import datetime import os import re import logging _logger = 'rotator' def parse_filename(name, regex, timeformat): log = logging.getLogger(_logger) match = regex.search(name) if match: prefix, date = match.groups() log.debug('successfully parsed filename: {}'.format(name)) return (prefix, datetime.datetime.strptime(date, timeformat)) return (None, None) def get_files_in_dir(path, regex, timeformat, recurse=False): res = {} for root,dirs,files in os.walk(path, topdown=False): for f in files: prefix, timestamp = parse_filename(f, regex, timeformat) if not prefix: continue if prefix not in res: res[prefix] = set() res[prefix].add((os.path.join(root,f), timestamp)) if not recurse: return res return res def rotate( dirs, regex, timeformat, recurse = False, keep_yearly = 0, keep_monthly = 0, keep_weekly = 0, keep_daily = 0, keep_hourly = 0, keep_30min = 0, keep_15min = 0, keep_5min = 0, keep_1min = 0): log = logging.getLogger(_logger) merged_filelist = {} for d in dirs: files = get_files_in_dir(d, regex, timeformat, recurse) for prefix in files.keys(): if prefix not in merged_filelist: merged_filelist[prefix] = set() merged_filelist[prefix].update(files[prefix]) for prefix, filelist in merged_filelist.items(): to_remove = get_deprecated_files( filelist, keep_yearly, keep_monthly, keep_weekly, keep_daily, keep_hourly, keep_30min, keep_15min, keep_5min, keep_1min) for f in to_remove: log.info("removing: {}".format(f)) os.remove(f) def get_deprecated_files( filelist, keep_yearly, keep_monthly, keep_weekly, keep_daily, keep_hourly, keep_30min, keep_15min, keep_5min, keep_1min): dates = [date for path,date in filelist] keep = { 'year' : [], 'month' : [], 'week' : [], 'day' : [], 'hour' : [], 'min30' : [], 'min15' : [], 'min5' : [], 'min1' : [] } saved = { 'year' : [], 'month' : [], 'week' : [], 'day' : [], 'hour' : [], 'min30' : [], 'min15' : [], 'min5' : [], 'min1' : [] } for date in sorted(dates): min1 = date-datetime.timedelta(seconds=date.second, microseconds=date.microsecond) min5 = date-datetime.timedelta(minutes=date.minute%5, seconds=date.second, microseconds=date.microsecond) min15 = date-datetime.timedelta(minutes=date.minute%15, seconds=date.second, microseconds=date.microsecond) min30 = date-datetime.timedelta(minutes=date.minute%30, seconds=date.second, microseconds=date.microsecond) hour = date-datetime.timedelta(minutes=date.minute, seconds=date.second, microseconds=date.microsecond) day = datetime.datetime.combine(date.date(), datetime.time.min) week = datetime.datetime.combine(date.date()-datetime.timedelta(days=date.weekday()), datetime.time.min) month = datetime.datetime(year=date.year, month=date.month, day=1) year = datetime.datetime(year=date.year, month=1, day=1) if year not in saved['year']: saved['year'].append(year) keep['year'].append(date) if month not in saved['month']: saved['month'].append(month) keep['month'].append(date) if week not in saved['week']: saved['week'].append(week) keep['week'].append(date) if day not in saved['day']: saved['day'].append(day) keep['day'].append(date) if hour not in saved['hour']: saved['hour'].append(hour) keep['hour'].append(date) if min30 not in saved['min30']: saved['min30'].append(min30) keep['min30'].append(date) if min15 not in saved['min15']: saved['min15'].append(min15) keep['min15'].append(date) if min5 not in saved['min5']: saved['min5'].append(min5) keep['min5'].append(date) if min1 not in saved['min1']: saved['min1'].append(min1) keep['min1'].append(date) if keep_yearly: keep['year'] = keep['year'][-keep_yearly:] else: keep['year'] = [] if keep_monthly: keep['month'] = keep['month'][-keep_monthly:] else: keep['month'] = [] if keep_weekly: keep['week'] = keep['week'][-keep_weekly:] else: keep['week'] = [] if keep_daily: keep['day'] = keep['day'][-keep_daily:] else: keep['day'] = [] if keep_hourly: keep['hour'] = keep['hour'][-keep_hourly:] else: keep['hour'] = [] if keep_30min: keep['min30'] = keep['min30'][-keep_30min:] else: keep['min30'] = [] if keep_15min: keep['min15'] = keep['min15'][-keep_15min:] else: keep['min15'] = [] if keep_5min: keep['min5'] = keep['min5'][-keep_5min:] else: keep['min5'] = [] if keep_1min: keep['min1'] = keep['min1'][-keep_1min:] else: keep['min1'] = [] all_keep = [] all_keep.extend(keep['year']) all_keep.extend(keep['month']) all_keep.extend(keep['week']) all_keep.extend(keep['day']) all_keep.extend(keep['hour']) all_keep.extend(keep['min30']) all_keep.extend(keep['min15']) all_keep.extend(keep['min5']) all_keep.extend(keep['min1']) all_keep = set(all_keep) to_remove = [date for date in dates if date not in all_keep] return [path for path,date in filelist if date not in all_keep]