cronwrapper!
This commit is contained in:
parent
b252943096
commit
8f9abbd5ed
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
build/
|
||||||
|
*.pyc
|
||||||
|
*.egg-info
|
||||||
|
dist/
|
235
cronwrapper/__init__.py
Executable file
235
cronwrapper/__init__.py
Executable file
@ -0,0 +1,235 @@
|
|||||||
|
#!/usr/bin/python3.6
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description='Wrapper for cronjobs')
|
||||||
|
parser.add_argument(
|
||||||
|
'-e', '--execution-time-limit',
|
||||||
|
help='max execution time (in seconds) for cron job',
|
||||||
|
type=int,
|
||||||
|
metavar='timeout',
|
||||||
|
default=[])
|
||||||
|
parser.add_argument(
|
||||||
|
'-n', '--name',
|
||||||
|
help='unique identifier for this cron job, defaults to executable name',
|
||||||
|
type=str,
|
||||||
|
metavar='name',
|
||||||
|
required=False)
|
||||||
|
parser.add_argument(
|
||||||
|
'-s', '--success-exit-codes',
|
||||||
|
help='Return code of cron job that should be considered a successful run',
|
||||||
|
type=int,
|
||||||
|
metavar='code',
|
||||||
|
default=[0],
|
||||||
|
nargs='+')
|
||||||
|
parser.add_argument(
|
||||||
|
'-i', '--ignore-error',
|
||||||
|
help='Regex to ignore if cronjob writes to stderr',
|
||||||
|
type=str,
|
||||||
|
metavar='err_re',
|
||||||
|
default=[],
|
||||||
|
nargs='+')
|
||||||
|
parser.add_argument(
|
||||||
|
'-w', '--warn-interval',
|
||||||
|
help='Warn to stderr (as in "send mail") at most once within this time interval (seconds)',
|
||||||
|
type=int,
|
||||||
|
metavar='warn_interval',
|
||||||
|
default=[86400])
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'-L', '--no-lock',
|
||||||
|
help='Allow multiple simultanious executions of this cron job',
|
||||||
|
action='store_true')
|
||||||
|
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'-c', '--cachedir',
|
||||||
|
help="Where to store output of failed runs",
|
||||||
|
type=str,
|
||||||
|
metavar='cachedir',
|
||||||
|
default=['/var/lib/cronwrapper'],
|
||||||
|
nargs=1)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'-l', '--lockdir',
|
||||||
|
help="Where to write lock-files",
|
||||||
|
type=str,
|
||||||
|
metavar='lockdir',
|
||||||
|
default=['/var/lock'],
|
||||||
|
nargs=1)
|
||||||
|
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'command',
|
||||||
|
help='Full path to cron-script to execute',
|
||||||
|
type=str,
|
||||||
|
nargs=1)
|
||||||
|
parser.add_argument(
|
||||||
|
'arg',
|
||||||
|
help='Optional arguments to cron script',
|
||||||
|
type=str,
|
||||||
|
nargs='*')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
if not args.name:
|
||||||
|
args.name = [os.path.basename(args.command[0])]
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
def exec_command(args, outfile, errfile, resfile):
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
args.command + args.arg,
|
||||||
|
stdout=outfile,
|
||||||
|
stderr=errfile)
|
||||||
|
|
||||||
|
if args.execution_time_limit:
|
||||||
|
timeout = args.execution_time_limit
|
||||||
|
else:
|
||||||
|
timeout = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
proc.communicate(timeout=timeout)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
proc.kill()
|
||||||
|
try:
|
||||||
|
proc.communicate(timeout=30)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
proc.terminate()
|
||||||
|
proc.communicate(timeout=10)
|
||||||
|
|
||||||
|
now=datetime.datetime.utcnow()
|
||||||
|
nowstr=now.strftime('%Y-%m-%d_%H%M.%S')
|
||||||
|
resfile.write('{}\n{}'.format(nowstr, proc.returncode))
|
||||||
|
return proc.returncode
|
||||||
|
|
||||||
|
def aquire_lock(lockfile):
|
||||||
|
me = "{}\n".format(os.getpid())
|
||||||
|
try:
|
||||||
|
with open(lockfile, 'x') as lock:
|
||||||
|
lock.write(me)
|
||||||
|
# we got the lock!
|
||||||
|
return True
|
||||||
|
except FileExistsError:
|
||||||
|
# some other process has this lock
|
||||||
|
with open(lockfile, 'r') as lock:
|
||||||
|
owner = lock.read()
|
||||||
|
owner_pid = int(owner)
|
||||||
|
try:
|
||||||
|
os.kill(owner_pid, 0)
|
||||||
|
# owner is still running
|
||||||
|
return False
|
||||||
|
except OSError:
|
||||||
|
os.remove(lockfile)
|
||||||
|
# owner is gone, we can make another attempt to lock
|
||||||
|
try:
|
||||||
|
with open(lockfile, 'x') as lock:
|
||||||
|
lock.write(me)
|
||||||
|
return True
|
||||||
|
except FileExistsError:
|
||||||
|
# someone stole our lock anyway :(
|
||||||
|
return False
|
||||||
|
|
||||||
|
def release_lock(lockfile):
|
||||||
|
if os.path.isfile(lockfile):
|
||||||
|
os.remove(lockfile)
|
||||||
|
|
||||||
|
def print_runs(runs, clean=True):
|
||||||
|
for run in sorted(runs):
|
||||||
|
with open(os.path.join(run, 'result'), 'r') as f:
|
||||||
|
try:
|
||||||
|
retcode = f.read().splitlines()[-1]
|
||||||
|
except IndexError:
|
||||||
|
retcode = 'unknown'
|
||||||
|
print("{} returncode {}".format(run, retcode))
|
||||||
|
print("STDOUT:".format(run))
|
||||||
|
with open(os.path.join(run, 'stdout'), 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
print(line)
|
||||||
|
print("\nSTDERR:".format(run))
|
||||||
|
with open(os.path.join(run, 'stderr'), 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
print(line)
|
||||||
|
print("\n\n")
|
||||||
|
if clean:
|
||||||
|
for run in runs:
|
||||||
|
shutil.rmtree(run)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
time_format = '%Y-%m-%d_%H%M'
|
||||||
|
args = parse_args()
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
nowstr = now.strftime(time_format)
|
||||||
|
libdir = os.path.join(args.cachedir[0], args.name[0], nowstr)
|
||||||
|
lckdir = os.path.join(args.lockdir[0], args.name[0])
|
||||||
|
|
||||||
|
|
||||||
|
os.makedirs(lckdir, exist_ok=True)
|
||||||
|
os.makedirs(libdir)
|
||||||
|
lckfile = os.path.join(lckdir, args.name[0])
|
||||||
|
outfile = os.path.join(libdir, 'stdout')
|
||||||
|
errfile = os.path.join(libdir, 'stderr')
|
||||||
|
resfile = os.path.join(libdir, 'result')
|
||||||
|
|
||||||
|
with open(outfile, 'w') as o, open(errfile, 'w') as e, open(resfile, 'w') as r:
|
||||||
|
if args.no_lock or aquire_lock(lckfile):
|
||||||
|
res = exec_command(args, o, e, r)
|
||||||
|
else:
|
||||||
|
e.write("CRONWRAPPER: Unable to aquire lock, previous instance still running?\n")
|
||||||
|
r.write("\nFalse\n")
|
||||||
|
res = False
|
||||||
|
|
||||||
|
previous_runs = []
|
||||||
|
for root, dirs, files in os.walk(os.path.join(args.cachedir[0], args.name[0])):
|
||||||
|
previous_runs = {
|
||||||
|
os.path.join(root, d): datetime.datetime.strptime(d, time_format)
|
||||||
|
for d in dirs
|
||||||
|
if datetime.datetime.strptime(d, time_format) < datetime.datetime.strptime(nowstr, time_format)}
|
||||||
|
break
|
||||||
|
|
||||||
|
if res in args.success_exit_codes:
|
||||||
|
# Possible success, check error output
|
||||||
|
re_checks = [re.compile(r) for r in args.ignore_error]
|
||||||
|
ok = True
|
||||||
|
with open(errfile, 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
ok = False
|
||||||
|
for r in re_checks:
|
||||||
|
if re.match(r, line):
|
||||||
|
ok = True
|
||||||
|
break
|
||||||
|
if not ok:
|
||||||
|
break
|
||||||
|
if ok:
|
||||||
|
# Yes! Success! report any errors until now
|
||||||
|
if previous_runs:
|
||||||
|
print("Success after {} failed runs\n".format(len(previous_runs)))
|
||||||
|
print_runs(previous_runs.keys())
|
||||||
|
shutil.rmtree(libdir)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Failure
|
||||||
|
if previous_runs:
|
||||||
|
# Not the first failure...
|
||||||
|
oldest = min(previous_runs.values())
|
||||||
|
if now-oldest > datetime.timedelta(seconds=args.warn_interval[0]):
|
||||||
|
# we have failed for a long time, send a report
|
||||||
|
print("Cronjob is still failing after {} seconds ({} failures)\n".format(args.warn_interval[0], len(previous_runs)))
|
||||||
|
print_runs(previous_runs.keys())
|
||||||
|
else:
|
||||||
|
# Within limits, save the output for a later instance
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# First failure, send a report, but keep the output
|
||||||
|
print("Cronjob failed\n")
|
||||||
|
print_runs([libdir], clean=False)
|
||||||
|
|
||||||
|
release_lock(lckfile)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
5
cronwrapper/__main__.py
Normal file
5
cronwrapper/__main__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
|
||||||
|
import cronwrapper
|
||||||
|
|
||||||
|
def main():
|
||||||
|
cronwrapper.main()
|
31
setup.py
Normal file
31
setup.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import setuptools
|
||||||
|
|
||||||
|
with open('README.md', 'r') as fh:
|
||||||
|
long_description = fh.read()
|
||||||
|
|
||||||
|
setuptools.setup(
|
||||||
|
name='cronwrapper',
|
||||||
|
version='0.1.0',
|
||||||
|
author='Fredrik Eriksson',
|
||||||
|
author_email='feffe@fulh.ax',
|
||||||
|
description='A small wrapper to handle cronjob failures',
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type='text/markdown',
|
||||||
|
url='https://fulh.ax/feffe/cronwrapper',
|
||||||
|
packages=setuptools.find_packages(),
|
||||||
|
#scripts=['bin/cronwrapper'],
|
||||||
|
license='BSD 3-caluse',
|
||||||
|
entry_points = {
|
||||||
|
'console_scripts': [
|
||||||
|
'cronwrapper = cronwrapper.__main__:main'
|
||||||
|
]
|
||||||
|
},
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 3 - Alpha',
|
||||||
|
'Environment :: Console',
|
||||||
|
'Intended Audience :: System Administrators',
|
||||||
|
'License :: OSI Approved :: BSD License',
|
||||||
|
'Operating System :: POSIX',
|
||||||
|
'Topic :: System :: Monitoring',
|
||||||
|
],
|
||||||
|
)
|
Loading…
Reference in New Issue
Block a user