From 863d7e77f80d18b3a5f4d3948d42d0efcf6505d1 Mon Sep 17 00:00:00 2001 From: Fredrik Eriksson Date: Thu, 17 Dec 2020 10:06:30 +0100 Subject: [PATCH] first version of rotator --- rotator/__init__.py | 204 ++++++++++++++++++++++++++++++++++++++++++++ rotator/__main__.py | 120 ++++++++++++++++++++++++++ setup.py | 29 +++++++ 3 files changed, 353 insertions(+) create mode 100644 rotator/__init__.py create mode 100644 rotator/__main__.py create mode 100644 setup.py diff --git a/rotator/__init__.py b/rotator/__init__.py new file mode 100644 index 0000000..1218ac7 --- /dev/null +++ b/rotator/__init__.py @@ -0,0 +1,204 @@ +import datetime +import os +import re +import logging + +_logger = 'rotator' + +def parse_filename(name, regex, timeformat): + log = logging.getLogger(_logger) + match = regex.search(name) + if match: + prefix, date = match.groups() + log.debug('successfully parsed filename: {}'.format(name)) + return (prefix, datetime.datetime.strptime(date, timeformat)) + return (None, None) + +def get_files_in_dir(path, regex, timeformat, recurse=False): + res = {} + for root,dirs,files in os.walk(path, topdown=False): + for f in files: + prefix, timestamp = parse_filename(f, regex, timeformat) + if prefix and prefix not in res: + res[prefix] = set() + res[prefix].add((os.path.join(root,f), timestamp)) + if not recurse: + return res + return res + +def rotate( + dirs, + regex, + timeformat, + recurse = False, + keep_yearly = 0, + keep_monthly = 0, + keep_weekly = 0, + keep_daily = 0, + keep_hourly = 0, + keep_30min = 0, + keep_15min = 0, + keep_5min = 0, + keep_1min = 0): + + log = logging.getLogger(_logger) + + merged_filelist = {} + for d in dirs: + files = get_files_in_dir(d, regex, timeformat, recurse) + for prefix in files.keys(): + if prefix not in merged_filelist: + merged_filelist[prefix] = set() + merged_filelist[prefix].update(files[prefix]) + + for prefix, filelist in merged_filelist.items(): + to_remove = get_deprecated_files( + filelist, + keep_yearly, + keep_monthly, + keep_weekly, + keep_daily, + keep_hourly, + keep_30min, + keep_15min, + keep_5min, + keep_1min) + for f in to_remove: + log.info("removing: {}".format(f)) + os.remove(f) + +def get_deprecated_files( + filelist, + keep_yearly, + keep_monthly, + keep_weekly, + keep_daily, + keep_hourly, + keep_30min, + keep_15min, + keep_5min, + keep_1min): + + dates = [date for path,date in filelist] + + keep = { + 'year' : [], + 'month' : [], + 'week' : [], + 'day' : [], + 'hour' : [], + 'min30' : [], + 'min15' : [], + 'min5' : [], + 'min1' : [] + } + saved = { + 'year' : [], + 'month' : [], + 'week' : [], + 'day' : [], + 'hour' : [], + 'min30' : [], + 'min15' : [], + 'min5' : [], + 'min1' : [] + } + + for date in sorted(dates): + min1 = date-datetime.timedelta(seconds=date.second, microseconds=date.microsecond) + min5 = date-datetime.timedelta(minutes=date.minute%5, seconds=date.second, microseconds=date.microsecond) + min15 = date-datetime.timedelta(minutes=date.minute%15, seconds=date.second, microseconds=date.microsecond) + min30 = date-datetime.timedelta(minutes=date.minute%30, seconds=date.second, microseconds=date.microsecond) + hour = date-datetime.timedelta(minutes=date.minute, seconds=date.second, microseconds=date.microsecond) + day = datetime.datetime.combine(date.date(), datetime.time.min) + week = datetime.datetime.combine(date.date()-datetime.timedelta(days=date.weekday()), datetime.time.min) + month = datetime.datetime(year=date.year, month=date.month, day=1) + year = datetime.datetime(year=date.year, month=1, day=1) + + if year not in saved['year']: + saved['year'].append(year) + keep['year'].append(date) + if month not in saved['month']: + saved['month'].append(month) + keep['month'].append(date) + if week not in saved['week']: + saved['week'].append(week) + keep['week'].append(date) + if day not in saved['day']: + saved['day'].append(day) + keep['day'].append(date) + if hour not in saved['hour']: + saved['hour'].append(hour) + keep['hour'].append(date) + if min30 not in saved['min30']: + saved['min30'].append(min30) + keep['min30'].append(date) + if min15 not in saved['min15']: + saved['min15'].append(min15) + keep['min15'].append(date) + if min5 not in saved['min5']: + saved['min5'].append(min5) + keep['min5'].append(date) + if min1 not in saved['min1']: + saved['min1'].append(min1) + keep['min1'].append(date) + + if keep_yearly: + keep['year'] = keep['year'][-keep_yearly:] + else: + keep['year'] = [] + + if keep_monthly: + keep['month'] = keep['month'][-keep_monthly:] + else: + keep['month'] = [] + + if keep_weekly: + keep['week'] = keep['week'][-keep_weekly:] + else: + keep['week'] = [] + + if keep_daily: + keep['day'] = keep['day'][-keep_daily:] + else: + keep['day'] = [] + + if keep_hourly: + keep['hour'] = keep['hour'][-keep_hourly:] + else: + keep['hour'] = [] + + if keep_30min: + keep['min30'] = keep['min30'][-keep_30min:] + else: + keep['min30'] = [] + + if keep_15min: + keep['min15'] = keep['min15'][-keep_15min:] + else: + keep['min15'] = [] + + if keep_5min: + keep['min5'] = keep['min5'][-keep_5min:] + else: + keep['min5'] = [] + + if keep_1min: + keep['min1'] = keep['min1'][-keep_1min:] + else: + keep['min1'] = [] + + all_keep = [] + all_keep.extend(keep['year']) + all_keep.extend(keep['month']) + all_keep.extend(keep['week']) + all_keep.extend(keep['day']) + all_keep.extend(keep['hour']) + all_keep.extend(keep['min30']) + all_keep.extend(keep['min15']) + all_keep.extend(keep['min5']) + all_keep.extend(keep['min1']) + all_keep = set(all_keep) + + to_remove = [date for date in dates if date not in all_keep] + return [path for path,date in filelist if date not in all_keep] diff --git a/rotator/__main__.py b/rotator/__main__.py new file mode 100644 index 0000000..c754585 --- /dev/null +++ b/rotator/__main__.py @@ -0,0 +1,120 @@ +import argparse +import logging +import logging.handlers +import re +import sys + +import rotator + +def parse_args(): + parser = argparse.ArgumentParser(description="Rotate backups based on timestamp in names") + parser.add_argument( + '-y', '--yearly', + help='number of yearly backups to keep', + type=int, + default=0) + parser.add_argument( + '-m', '--monthly', + help='number of monthly backups to keep', + type=int, + default=0) + parser.add_argument( + '-w', '--weekly', + help='number of weekly backups to keep', + type=int, + default=0) + parser.add_argument( + '-d', '--daily', + help='number of daily backups to keep', + type=int, + default=0) + parser.add_argument( + '-o', '--hourly', + help='number of hourly backups to keep', + type=int, + default=0) + parser.add_argument( + '--min30', + help='number of half-hourly backups to keep', + type=int, + default=0) + parser.add_argument( + '--min15', + help='number of quarterly-hourly backups to keep', + type=int, + default=0) + parser.add_argument( + '--min5', + help='number of 5-minutely backups to keep', + type=int, + default=0) + parser.add_argument( + '--min1', + help='number of minutely backups to keep', + type=int, + default=0) + + parser.add_argument( + '-r', '--regex', + help='''only rotate backups matching the given regex. Regex must + contain at least two match groups, the first being identifier for + the backup set and the second being the date.''', + default=r'^(.*)[-_.]([0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{4})') + parser.add_argument( + '-t', '--time-format', + help='time format of the timestamp (python format)', + default='%Y-%m-%d_%H%M') + + parser.add_argument( + '-R', '--recurse', + action='store_true', + help='process paths recursively') + parser.add_argument( + 'path', + help='Full path to the directory containing the backup files', + nargs='+') + return parser.parse_args() + +def main(): + log = logging.getLogger(rotator._logger) + log.setLevel(logging.INFO) + + handler = logging.StreamHandler() + handler.setLevel(logging.WARNING) + log.addHandler(handler) + + handler = logging.handlers.SysLogHandler(address='/dev/log') + formatter = logging.Formatter(fmt='rotator[%(process)s] %(message)s') + handler.setFormatter(formatter) + handler.setLevel(logging.INFO) + log.addHandler(handler) + + args = parse_args() + + if all([not x for x in + [args.yearly, + args.monthly, + args.weekly, + args.daily, + args.hourly, + args.min30, + args.min15, + args.min5, + args.min1]]): + log.error('All time intervals set to 0, this would remove all backups, refusing to run') + sys.exit(2) + + rotator.rotate( + args.path, + re.compile(args.regex), + args.time_format, + recurse=args.recurse, + keep_yearly=args.yearly, + keep_monthly=args.monthly, + keep_weekly=args.weekly, + keep_daily=args.daily, + keep_hourly=args.hourly, + keep_30min=args.min30, + keep_15min=args.min15, + keep_5min=args.min5, + keep_1min=args.min1) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e9be6f9 --- /dev/null +++ b/setup.py @@ -0,0 +1,29 @@ +import setuptools + +with open('README.md', 'r') as fh: + long_description = fh.read() + +setuptools.setup( + name='rotator', + version='0.1.0', + author='Fredrik Eriksson', + author_email='feffe@fulh.ax', + description='Simple script for rotating backups', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://gitea.fulh.ax/feffe/rotator', + packages=setuptools.find_packages(), + license='BSD 3-caluse', + entry_points = { + 'console_scripts': [ + 'rotator = rotator.__main__:main' + ] + }, + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: System Administrators', + 'License :: OSI Approved :: BSD License', + 'Operating System :: POSIX', + ], + )