rotator/rotator/__init__.py

207 lines
6.1 KiB
Python

import datetime
import os
import re
import logging
_logger = 'rotator'
def parse_filename(name, regex, timeformat):
log = logging.getLogger(_logger)
match = regex.search(name)
if match:
prefix, date = match.groups()
log.debug('successfully parsed filename: {}'.format(name))
return (prefix, datetime.datetime.strptime(date, timeformat))
return (None, None)
def get_files_in_dir(path, regex, timeformat, recurse=False):
res = {}
for root,dirs,files in os.walk(path, topdown=False):
for f in files:
prefix, timestamp = parse_filename(f, regex, timeformat)
if not prefix:
continue
if prefix not in res:
res[prefix] = set()
res[prefix].add((os.path.join(root,f), timestamp))
if not recurse:
return res
return res
def rotate(
dirs,
regex,
timeformat,
recurse = False,
keep_yearly = 0,
keep_monthly = 0,
keep_weekly = 0,
keep_daily = 0,
keep_hourly = 0,
keep_30min = 0,
keep_15min = 0,
keep_5min = 0,
keep_1min = 0):
log = logging.getLogger(_logger)
merged_filelist = {}
for d in dirs:
files = get_files_in_dir(d, regex, timeformat, recurse)
for prefix in files.keys():
if prefix not in merged_filelist:
merged_filelist[prefix] = set()
merged_filelist[prefix].update(files[prefix])
for prefix, filelist in merged_filelist.items():
to_remove = get_deprecated_files(
filelist,
keep_yearly,
keep_monthly,
keep_weekly,
keep_daily,
keep_hourly,
keep_30min,
keep_15min,
keep_5min,
keep_1min)
for f in to_remove:
log.info("removing: {}".format(f))
os.remove(f)
def get_deprecated_files(
filelist,
keep_yearly,
keep_monthly,
keep_weekly,
keep_daily,
keep_hourly,
keep_30min,
keep_15min,
keep_5min,
keep_1min):
dates = [date for path,date in filelist]
keep = {
'year' : [],
'month' : [],
'week' : [],
'day' : [],
'hour' : [],
'min30' : [],
'min15' : [],
'min5' : [],
'min1' : []
}
saved = {
'year' : [],
'month' : [],
'week' : [],
'day' : [],
'hour' : [],
'min30' : [],
'min15' : [],
'min5' : [],
'min1' : []
}
for date in sorted(dates):
min1 = date-datetime.timedelta(seconds=date.second, microseconds=date.microsecond)
min5 = date-datetime.timedelta(minutes=date.minute%5, seconds=date.second, microseconds=date.microsecond)
min15 = date-datetime.timedelta(minutes=date.minute%15, seconds=date.second, microseconds=date.microsecond)
min30 = date-datetime.timedelta(minutes=date.minute%30, seconds=date.second, microseconds=date.microsecond)
hour = date-datetime.timedelta(minutes=date.minute, seconds=date.second, microseconds=date.microsecond)
day = datetime.datetime.combine(date.date(), datetime.time.min)
week = datetime.datetime.combine(date.date()-datetime.timedelta(days=date.weekday()), datetime.time.min)
month = datetime.datetime(year=date.year, month=date.month, day=1)
year = datetime.datetime(year=date.year, month=1, day=1)
if year not in saved['year']:
saved['year'].append(year)
keep['year'].append(date)
if month not in saved['month']:
saved['month'].append(month)
keep['month'].append(date)
if week not in saved['week']:
saved['week'].append(week)
keep['week'].append(date)
if day not in saved['day']:
saved['day'].append(day)
keep['day'].append(date)
if hour not in saved['hour']:
saved['hour'].append(hour)
keep['hour'].append(date)
if min30 not in saved['min30']:
saved['min30'].append(min30)
keep['min30'].append(date)
if min15 not in saved['min15']:
saved['min15'].append(min15)
keep['min15'].append(date)
if min5 not in saved['min5']:
saved['min5'].append(min5)
keep['min5'].append(date)
if min1 not in saved['min1']:
saved['min1'].append(min1)
keep['min1'].append(date)
if keep_yearly:
keep['year'] = keep['year'][-keep_yearly:]
else:
keep['year'] = []
if keep_monthly:
keep['month'] = keep['month'][-keep_monthly:]
else:
keep['month'] = []
if keep_weekly:
keep['week'] = keep['week'][-keep_weekly:]
else:
keep['week'] = []
if keep_daily:
keep['day'] = keep['day'][-keep_daily:]
else:
keep['day'] = []
if keep_hourly:
keep['hour'] = keep['hour'][-keep_hourly:]
else:
keep['hour'] = []
if keep_30min:
keep['min30'] = keep['min30'][-keep_30min:]
else:
keep['min30'] = []
if keep_15min:
keep['min15'] = keep['min15'][-keep_15min:]
else:
keep['min15'] = []
if keep_5min:
keep['min5'] = keep['min5'][-keep_5min:]
else:
keep['min5'] = []
if keep_1min:
keep['min1'] = keep['min1'][-keep_1min:]
else:
keep['min1'] = []
all_keep = []
all_keep.extend(keep['year'])
all_keep.extend(keep['month'])
all_keep.extend(keep['week'])
all_keep.extend(keep['day'])
all_keep.extend(keep['hour'])
all_keep.extend(keep['min30'])
all_keep.extend(keep['min15'])
all_keep.extend(keep['min5'])
all_keep.extend(keep['min1'])
all_keep = set(all_keep)
to_remove = [date for date in dates if date not in all_keep]
return [path for path,date in filelist if date not in all_keep]