cronwrapper!
This commit is contained in:
parent
b252943096
commit
8f9abbd5ed
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
build/
|
||||
*.pyc
|
||||
*.egg-info
|
||||
dist/
|
235
cronwrapper/__init__.py
Executable file
235
cronwrapper/__init__.py
Executable file
@ -0,0 +1,235 @@
|
||||
#!/usr/bin/python3.6
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Wrapper for cronjobs')
|
||||
parser.add_argument(
|
||||
'-e', '--execution-time-limit',
|
||||
help='max execution time (in seconds) for cron job',
|
||||
type=int,
|
||||
metavar='timeout',
|
||||
default=[])
|
||||
parser.add_argument(
|
||||
'-n', '--name',
|
||||
help='unique identifier for this cron job, defaults to executable name',
|
||||
type=str,
|
||||
metavar='name',
|
||||
required=False)
|
||||
parser.add_argument(
|
||||
'-s', '--success-exit-codes',
|
||||
help='Return code of cron job that should be considered a successful run',
|
||||
type=int,
|
||||
metavar='code',
|
||||
default=[0],
|
||||
nargs='+')
|
||||
parser.add_argument(
|
||||
'-i', '--ignore-error',
|
||||
help='Regex to ignore if cronjob writes to stderr',
|
||||
type=str,
|
||||
metavar='err_re',
|
||||
default=[],
|
||||
nargs='+')
|
||||
parser.add_argument(
|
||||
'-w', '--warn-interval',
|
||||
help='Warn to stderr (as in "send mail") at most once within this time interval (seconds)',
|
||||
type=int,
|
||||
metavar='warn_interval',
|
||||
default=[86400])
|
||||
|
||||
parser.add_argument(
|
||||
'-L', '--no-lock',
|
||||
help='Allow multiple simultanious executions of this cron job',
|
||||
action='store_true')
|
||||
|
||||
|
||||
parser.add_argument(
|
||||
'-c', '--cachedir',
|
||||
help="Where to store output of failed runs",
|
||||
type=str,
|
||||
metavar='cachedir',
|
||||
default=['/var/lib/cronwrapper'],
|
||||
nargs=1)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--lockdir',
|
||||
help="Where to write lock-files",
|
||||
type=str,
|
||||
metavar='lockdir',
|
||||
default=['/var/lock'],
|
||||
nargs=1)
|
||||
|
||||
|
||||
parser.add_argument(
|
||||
'command',
|
||||
help='Full path to cron-script to execute',
|
||||
type=str,
|
||||
nargs=1)
|
||||
parser.add_argument(
|
||||
'arg',
|
||||
help='Optional arguments to cron script',
|
||||
type=str,
|
||||
nargs='*')
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.name:
|
||||
args.name = [os.path.basename(args.command[0])]
|
||||
|
||||
return args
|
||||
|
||||
def exec_command(args, outfile, errfile, resfile):
|
||||
proc = subprocess.Popen(
|
||||
args.command + args.arg,
|
||||
stdout=outfile,
|
||||
stderr=errfile)
|
||||
|
||||
if args.execution_time_limit:
|
||||
timeout = args.execution_time_limit
|
||||
else:
|
||||
timeout = None
|
||||
|
||||
try:
|
||||
proc.communicate(timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc.kill()
|
||||
try:
|
||||
proc.communicate(timeout=30)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc.terminate()
|
||||
proc.communicate(timeout=10)
|
||||
|
||||
now=datetime.datetime.utcnow()
|
||||
nowstr=now.strftime('%Y-%m-%d_%H%M.%S')
|
||||
resfile.write('{}\n{}'.format(nowstr, proc.returncode))
|
||||
return proc.returncode
|
||||
|
||||
def aquire_lock(lockfile):
|
||||
me = "{}\n".format(os.getpid())
|
||||
try:
|
||||
with open(lockfile, 'x') as lock:
|
||||
lock.write(me)
|
||||
# we got the lock!
|
||||
return True
|
||||
except FileExistsError:
|
||||
# some other process has this lock
|
||||
with open(lockfile, 'r') as lock:
|
||||
owner = lock.read()
|
||||
owner_pid = int(owner)
|
||||
try:
|
||||
os.kill(owner_pid, 0)
|
||||
# owner is still running
|
||||
return False
|
||||
except OSError:
|
||||
os.remove(lockfile)
|
||||
# owner is gone, we can make another attempt to lock
|
||||
try:
|
||||
with open(lockfile, 'x') as lock:
|
||||
lock.write(me)
|
||||
return True
|
||||
except FileExistsError:
|
||||
# someone stole our lock anyway :(
|
||||
return False
|
||||
|
||||
def release_lock(lockfile):
|
||||
if os.path.isfile(lockfile):
|
||||
os.remove(lockfile)
|
||||
|
||||
def print_runs(runs, clean=True):
|
||||
for run in sorted(runs):
|
||||
with open(os.path.join(run, 'result'), 'r') as f:
|
||||
try:
|
||||
retcode = f.read().splitlines()[-1]
|
||||
except IndexError:
|
||||
retcode = 'unknown'
|
||||
print("{} returncode {}".format(run, retcode))
|
||||
print("STDOUT:".format(run))
|
||||
with open(os.path.join(run, 'stdout'), 'r') as f:
|
||||
for line in f:
|
||||
print(line)
|
||||
print("\nSTDERR:".format(run))
|
||||
with open(os.path.join(run, 'stderr'), 'r') as f:
|
||||
for line in f:
|
||||
print(line)
|
||||
print("\n\n")
|
||||
if clean:
|
||||
for run in runs:
|
||||
shutil.rmtree(run)
|
||||
|
||||
def main():
|
||||
time_format = '%Y-%m-%d_%H%M'
|
||||
args = parse_args()
|
||||
now = datetime.datetime.utcnow()
|
||||
nowstr = now.strftime(time_format)
|
||||
libdir = os.path.join(args.cachedir[0], args.name[0], nowstr)
|
||||
lckdir = os.path.join(args.lockdir[0], args.name[0])
|
||||
|
||||
|
||||
os.makedirs(lckdir, exist_ok=True)
|
||||
os.makedirs(libdir)
|
||||
lckfile = os.path.join(lckdir, args.name[0])
|
||||
outfile = os.path.join(libdir, 'stdout')
|
||||
errfile = os.path.join(libdir, 'stderr')
|
||||
resfile = os.path.join(libdir, 'result')
|
||||
|
||||
with open(outfile, 'w') as o, open(errfile, 'w') as e, open(resfile, 'w') as r:
|
||||
if args.no_lock or aquire_lock(lckfile):
|
||||
res = exec_command(args, o, e, r)
|
||||
else:
|
||||
e.write("CRONWRAPPER: Unable to aquire lock, previous instance still running?\n")
|
||||
r.write("\nFalse\n")
|
||||
res = False
|
||||
|
||||
previous_runs = []
|
||||
for root, dirs, files in os.walk(os.path.join(args.cachedir[0], args.name[0])):
|
||||
previous_runs = {
|
||||
os.path.join(root, d): datetime.datetime.strptime(d, time_format)
|
||||
for d in dirs
|
||||
if datetime.datetime.strptime(d, time_format) < datetime.datetime.strptime(nowstr, time_format)}
|
||||
break
|
||||
|
||||
if res in args.success_exit_codes:
|
||||
# Possible success, check error output
|
||||
re_checks = [re.compile(r) for r in args.ignore_error]
|
||||
ok = True
|
||||
with open(errfile, 'r') as f:
|
||||
for line in f:
|
||||
ok = False
|
||||
for r in re_checks:
|
||||
if re.match(r, line):
|
||||
ok = True
|
||||
break
|
||||
if not ok:
|
||||
break
|
||||
if ok:
|
||||
# Yes! Success! report any errors until now
|
||||
if previous_runs:
|
||||
print("Success after {} failed runs\n".format(len(previous_runs)))
|
||||
print_runs(previous_runs.keys())
|
||||
shutil.rmtree(libdir)
|
||||
return 0
|
||||
|
||||
# Failure
|
||||
if previous_runs:
|
||||
# Not the first failure...
|
||||
oldest = min(previous_runs.values())
|
||||
if now-oldest > datetime.timedelta(seconds=args.warn_interval[0]):
|
||||
# we have failed for a long time, send a report
|
||||
print("Cronjob is still failing after {} seconds ({} failures)\n".format(args.warn_interval[0], len(previous_runs)))
|
||||
print_runs(previous_runs.keys())
|
||||
else:
|
||||
# Within limits, save the output for a later instance
|
||||
pass
|
||||
else:
|
||||
# First failure, send a report, but keep the output
|
||||
print("Cronjob failed\n")
|
||||
print_runs([libdir], clean=False)
|
||||
|
||||
release_lock(lckfile)
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
5
cronwrapper/__main__.py
Normal file
5
cronwrapper/__main__.py
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
import cronwrapper
|
||||
|
||||
def main():
|
||||
cronwrapper.main()
|
31
setup.py
Normal file
31
setup.py
Normal file
@ -0,0 +1,31 @@
|
||||
import setuptools
|
||||
|
||||
with open('README.md', 'r') as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
setuptools.setup(
|
||||
name='cronwrapper',
|
||||
version='0.1.0',
|
||||
author='Fredrik Eriksson',
|
||||
author_email='feffe@fulh.ax',
|
||||
description='A small wrapper to handle cronjob failures',
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://fulh.ax/feffe/cronwrapper',
|
||||
packages=setuptools.find_packages(),
|
||||
#scripts=['bin/cronwrapper'],
|
||||
license='BSD 3-caluse',
|
||||
entry_points = {
|
||||
'console_scripts': [
|
||||
'cronwrapper = cronwrapper.__main__:main'
|
||||
]
|
||||
},
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Environment :: Console',
|
||||
'Intended Audience :: System Administrators',
|
||||
'License :: OSI Approved :: BSD License',
|
||||
'Operating System :: POSIX',
|
||||
'Topic :: System :: Monitoring',
|
||||
],
|
||||
)
|
Loading…
Reference in New Issue
Block a user