From 8f9abbd5ed6d75ef6fdb0708f8a47f6dba00da5d Mon Sep 17 00:00:00 2001 From: Fredrik Eriksson Date: Sun, 19 May 2019 13:56:01 +0200 Subject: [PATCH] cronwrapper! --- .gitignore | 4 + cronwrapper/__init__.py | 235 ++++++++++++++++++++++++++++++++++++++++ cronwrapper/__main__.py | 5 + setup.py | 31 ++++++ 4 files changed, 275 insertions(+) create mode 100644 .gitignore create mode 100755 cronwrapper/__init__.py create mode 100644 cronwrapper/__main__.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cb2f099 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +build/ +*.pyc +*.egg-info +dist/ diff --git a/cronwrapper/__init__.py b/cronwrapper/__init__.py new file mode 100755 index 0000000..cda5b1c --- /dev/null +++ b/cronwrapper/__init__.py @@ -0,0 +1,235 @@ +#!/usr/bin/python3.6 + +import argparse +import datetime +import os +import shutil +import subprocess +import sys + +def parse_args(): + parser = argparse.ArgumentParser(description='Wrapper for cronjobs') + parser.add_argument( + '-e', '--execution-time-limit', + help='max execution time (in seconds) for cron job', + type=int, + metavar='timeout', + default=[]) + parser.add_argument( + '-n', '--name', + help='unique identifier for this cron job, defaults to executable name', + type=str, + metavar='name', + required=False) + parser.add_argument( + '-s', '--success-exit-codes', + help='Return code of cron job that should be considered a successful run', + type=int, + metavar='code', + default=[0], + nargs='+') + parser.add_argument( + '-i', '--ignore-error', + help='Regex to ignore if cronjob writes to stderr', + type=str, + metavar='err_re', + default=[], + nargs='+') + parser.add_argument( + '-w', '--warn-interval', + help='Warn to stderr (as in "send mail") at most once within this time interval (seconds)', + type=int, + metavar='warn_interval', + default=[86400]) + + parser.add_argument( + '-L', '--no-lock', + help='Allow multiple simultanious executions of this cron job', + action='store_true') + + + parser.add_argument( + '-c', '--cachedir', + help="Where to store output of failed runs", + type=str, + metavar='cachedir', + default=['/var/lib/cronwrapper'], + nargs=1) + + parser.add_argument( + '-l', '--lockdir', + help="Where to write lock-files", + type=str, + metavar='lockdir', + default=['/var/lock'], + nargs=1) + + + parser.add_argument( + 'command', + help='Full path to cron-script to execute', + type=str, + nargs=1) + parser.add_argument( + 'arg', + help='Optional arguments to cron script', + type=str, + nargs='*') + + args = parser.parse_args() + if not args.name: + args.name = [os.path.basename(args.command[0])] + + return args + +def exec_command(args, outfile, errfile, resfile): + proc = subprocess.Popen( + args.command + args.arg, + stdout=outfile, + stderr=errfile) + + if args.execution_time_limit: + timeout = args.execution_time_limit + else: + timeout = None + + try: + proc.communicate(timeout=timeout) + except subprocess.TimeoutExpired: + proc.kill() + try: + proc.communicate(timeout=30) + except subprocess.TimeoutExpired: + proc.terminate() + proc.communicate(timeout=10) + + now=datetime.datetime.utcnow() + nowstr=now.strftime('%Y-%m-%d_%H%M.%S') + resfile.write('{}\n{}'.format(nowstr, proc.returncode)) + return proc.returncode + +def aquire_lock(lockfile): + me = "{}\n".format(os.getpid()) + try: + with open(lockfile, 'x') as lock: + lock.write(me) + # we got the lock! + return True + except FileExistsError: + # some other process has this lock + with open(lockfile, 'r') as lock: + owner = lock.read() + owner_pid = int(owner) + try: + os.kill(owner_pid, 0) + # owner is still running + return False + except OSError: + os.remove(lockfile) + # owner is gone, we can make another attempt to lock + try: + with open(lockfile, 'x') as lock: + lock.write(me) + return True + except FileExistsError: + # someone stole our lock anyway :( + return False + +def release_lock(lockfile): + if os.path.isfile(lockfile): + os.remove(lockfile) + +def print_runs(runs, clean=True): + for run in sorted(runs): + with open(os.path.join(run, 'result'), 'r') as f: + try: + retcode = f.read().splitlines()[-1] + except IndexError: + retcode = 'unknown' + print("{} returncode {}".format(run, retcode)) + print("STDOUT:".format(run)) + with open(os.path.join(run, 'stdout'), 'r') as f: + for line in f: + print(line) + print("\nSTDERR:".format(run)) + with open(os.path.join(run, 'stderr'), 'r') as f: + for line in f: + print(line) + print("\n\n") + if clean: + for run in runs: + shutil.rmtree(run) + +def main(): + time_format = '%Y-%m-%d_%H%M' + args = parse_args() + now = datetime.datetime.utcnow() + nowstr = now.strftime(time_format) + libdir = os.path.join(args.cachedir[0], args.name[0], nowstr) + lckdir = os.path.join(args.lockdir[0], args.name[0]) + + + os.makedirs(lckdir, exist_ok=True) + os.makedirs(libdir) + lckfile = os.path.join(lckdir, args.name[0]) + outfile = os.path.join(libdir, 'stdout') + errfile = os.path.join(libdir, 'stderr') + resfile = os.path.join(libdir, 'result') + + with open(outfile, 'w') as o, open(errfile, 'w') as e, open(resfile, 'w') as r: + if args.no_lock or aquire_lock(lckfile): + res = exec_command(args, o, e, r) + else: + e.write("CRONWRAPPER: Unable to aquire lock, previous instance still running?\n") + r.write("\nFalse\n") + res = False + + previous_runs = [] + for root, dirs, files in os.walk(os.path.join(args.cachedir[0], args.name[0])): + previous_runs = { + os.path.join(root, d): datetime.datetime.strptime(d, time_format) + for d in dirs + if datetime.datetime.strptime(d, time_format) < datetime.datetime.strptime(nowstr, time_format)} + break + + if res in args.success_exit_codes: + # Possible success, check error output + re_checks = [re.compile(r) for r in args.ignore_error] + ok = True + with open(errfile, 'r') as f: + for line in f: + ok = False + for r in re_checks: + if re.match(r, line): + ok = True + break + if not ok: + break + if ok: + # Yes! Success! report any errors until now + if previous_runs: + print("Success after {} failed runs\n".format(len(previous_runs))) + print_runs(previous_runs.keys()) + shutil.rmtree(libdir) + return 0 + + # Failure + if previous_runs: + # Not the first failure... + oldest = min(previous_runs.values()) + if now-oldest > datetime.timedelta(seconds=args.warn_interval[0]): + # we have failed for a long time, send a report + print("Cronjob is still failing after {} seconds ({} failures)\n".format(args.warn_interval[0], len(previous_runs))) + print_runs(previous_runs.keys()) + else: + # Within limits, save the output for a later instance + pass + else: + # First failure, send a report, but keep the output + print("Cronjob failed\n") + print_runs([libdir], clean=False) + + release_lock(lckfile) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/cronwrapper/__main__.py b/cronwrapper/__main__.py new file mode 100644 index 0000000..d74bba8 --- /dev/null +++ b/cronwrapper/__main__.py @@ -0,0 +1,5 @@ + +import cronwrapper + +def main(): + cronwrapper.main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0599e2c --- /dev/null +++ b/setup.py @@ -0,0 +1,31 @@ +import setuptools + +with open('README.md', 'r') as fh: + long_description = fh.read() + +setuptools.setup( + name='cronwrapper', + version='0.1.0', + author='Fredrik Eriksson', + author_email='feffe@fulh.ax', + description='A small wrapper to handle cronjob failures', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://fulh.ax/feffe/cronwrapper', + packages=setuptools.find_packages(), + #scripts=['bin/cronwrapper'], + license='BSD 3-caluse', + entry_points = { + 'console_scripts': [ + 'cronwrapper = cronwrapper.__main__:main' + ] + }, + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: System Administrators', + 'License :: OSI Approved :: BSD License', + 'Operating System :: POSIX', + 'Topic :: System :: Monitoring', + ], + )