207 lines
6.1 KiB
Python
207 lines
6.1 KiB
Python
import datetime
|
|
import os
|
|
import re
|
|
import logging
|
|
|
|
_logger = 'rotator'
|
|
|
|
def parse_filename(name, regex, timeformat):
|
|
log = logging.getLogger(_logger)
|
|
match = regex.search(name)
|
|
if match:
|
|
prefix, date = match.groups()
|
|
log.debug('successfully parsed filename: {}'.format(name))
|
|
return (prefix, datetime.datetime.strptime(date, timeformat))
|
|
return (None, None)
|
|
|
|
def get_files_in_dir(path, regex, timeformat, recurse=False):
|
|
res = {}
|
|
for root,dirs,files in os.walk(path, topdown=False):
|
|
for f in files:
|
|
prefix, timestamp = parse_filename(f, regex, timeformat)
|
|
if not prefix:
|
|
continue
|
|
if prefix not in res:
|
|
res[prefix] = set()
|
|
res[prefix].add((os.path.join(root,f), timestamp))
|
|
if not recurse:
|
|
return res
|
|
return res
|
|
|
|
def rotate(
|
|
dirs,
|
|
regex,
|
|
timeformat,
|
|
recurse = False,
|
|
keep_yearly = 0,
|
|
keep_monthly = 0,
|
|
keep_weekly = 0,
|
|
keep_daily = 0,
|
|
keep_hourly = 0,
|
|
keep_30min = 0,
|
|
keep_15min = 0,
|
|
keep_5min = 0,
|
|
keep_1min = 0):
|
|
|
|
log = logging.getLogger(_logger)
|
|
|
|
merged_filelist = {}
|
|
for d in dirs:
|
|
files = get_files_in_dir(d, regex, timeformat, recurse)
|
|
for prefix in files.keys():
|
|
if prefix not in merged_filelist:
|
|
merged_filelist[prefix] = set()
|
|
merged_filelist[prefix].update(files[prefix])
|
|
|
|
for prefix, filelist in merged_filelist.items():
|
|
to_remove = get_deprecated_files(
|
|
filelist,
|
|
keep_yearly,
|
|
keep_monthly,
|
|
keep_weekly,
|
|
keep_daily,
|
|
keep_hourly,
|
|
keep_30min,
|
|
keep_15min,
|
|
keep_5min,
|
|
keep_1min)
|
|
for f in to_remove:
|
|
log.info("removing: {}".format(f))
|
|
os.remove(f)
|
|
|
|
def get_deprecated_files(
|
|
filelist,
|
|
keep_yearly,
|
|
keep_monthly,
|
|
keep_weekly,
|
|
keep_daily,
|
|
keep_hourly,
|
|
keep_30min,
|
|
keep_15min,
|
|
keep_5min,
|
|
keep_1min):
|
|
|
|
dates = [date for path,date in filelist]
|
|
|
|
keep = {
|
|
'year' : [],
|
|
'month' : [],
|
|
'week' : [],
|
|
'day' : [],
|
|
'hour' : [],
|
|
'min30' : [],
|
|
'min15' : [],
|
|
'min5' : [],
|
|
'min1' : []
|
|
}
|
|
saved = {
|
|
'year' : [],
|
|
'month' : [],
|
|
'week' : [],
|
|
'day' : [],
|
|
'hour' : [],
|
|
'min30' : [],
|
|
'min15' : [],
|
|
'min5' : [],
|
|
'min1' : []
|
|
}
|
|
|
|
for date in sorted(dates):
|
|
min1 = date-datetime.timedelta(seconds=date.second, microseconds=date.microsecond)
|
|
min5 = date-datetime.timedelta(minutes=date.minute%5, seconds=date.second, microseconds=date.microsecond)
|
|
min15 = date-datetime.timedelta(minutes=date.minute%15, seconds=date.second, microseconds=date.microsecond)
|
|
min30 = date-datetime.timedelta(minutes=date.minute%30, seconds=date.second, microseconds=date.microsecond)
|
|
hour = date-datetime.timedelta(minutes=date.minute, seconds=date.second, microseconds=date.microsecond)
|
|
day = datetime.datetime.combine(date.date(), datetime.time.min)
|
|
week = datetime.datetime.combine(date.date()-datetime.timedelta(days=date.weekday()), datetime.time.min)
|
|
month = datetime.datetime(year=date.year, month=date.month, day=1)
|
|
year = datetime.datetime(year=date.year, month=1, day=1)
|
|
|
|
if year not in saved['year']:
|
|
saved['year'].append(year)
|
|
keep['year'].append(date)
|
|
if month not in saved['month']:
|
|
saved['month'].append(month)
|
|
keep['month'].append(date)
|
|
if week not in saved['week']:
|
|
saved['week'].append(week)
|
|
keep['week'].append(date)
|
|
if day not in saved['day']:
|
|
saved['day'].append(day)
|
|
keep['day'].append(date)
|
|
if hour not in saved['hour']:
|
|
saved['hour'].append(hour)
|
|
keep['hour'].append(date)
|
|
if min30 not in saved['min30']:
|
|
saved['min30'].append(min30)
|
|
keep['min30'].append(date)
|
|
if min15 not in saved['min15']:
|
|
saved['min15'].append(min15)
|
|
keep['min15'].append(date)
|
|
if min5 not in saved['min5']:
|
|
saved['min5'].append(min5)
|
|
keep['min5'].append(date)
|
|
if min1 not in saved['min1']:
|
|
saved['min1'].append(min1)
|
|
keep['min1'].append(date)
|
|
|
|
if keep_yearly:
|
|
keep['year'] = keep['year'][-keep_yearly:]
|
|
else:
|
|
keep['year'] = []
|
|
|
|
if keep_monthly:
|
|
keep['month'] = keep['month'][-keep_monthly:]
|
|
else:
|
|
keep['month'] = []
|
|
|
|
if keep_weekly:
|
|
keep['week'] = keep['week'][-keep_weekly:]
|
|
else:
|
|
keep['week'] = []
|
|
|
|
if keep_daily:
|
|
keep['day'] = keep['day'][-keep_daily:]
|
|
else:
|
|
keep['day'] = []
|
|
|
|
if keep_hourly:
|
|
keep['hour'] = keep['hour'][-keep_hourly:]
|
|
else:
|
|
keep['hour'] = []
|
|
|
|
if keep_30min:
|
|
keep['min30'] = keep['min30'][-keep_30min:]
|
|
else:
|
|
keep['min30'] = []
|
|
|
|
if keep_15min:
|
|
keep['min15'] = keep['min15'][-keep_15min:]
|
|
else:
|
|
keep['min15'] = []
|
|
|
|
if keep_5min:
|
|
keep['min5'] = keep['min5'][-keep_5min:]
|
|
else:
|
|
keep['min5'] = []
|
|
|
|
if keep_1min:
|
|
keep['min1'] = keep['min1'][-keep_1min:]
|
|
else:
|
|
keep['min1'] = []
|
|
|
|
all_keep = []
|
|
all_keep.extend(keep['year'])
|
|
all_keep.extend(keep['month'])
|
|
all_keep.extend(keep['week'])
|
|
all_keep.extend(keep['day'])
|
|
all_keep.extend(keep['hour'])
|
|
all_keep.extend(keep['min30'])
|
|
all_keep.extend(keep['min15'])
|
|
all_keep.extend(keep['min5'])
|
|
all_keep.extend(keep['min1'])
|
|
all_keep = set(all_keep)
|
|
|
|
to_remove = [date for date in dates if date not in all_keep]
|
|
return [path for path,date in filelist if date not in all_keep]
|