another attempt...
This commit is contained in:
parent
7833254c33
commit
8bd3148a66
136
sau/services.py
136
sau/services.py
@ -74,7 +74,7 @@ def restart_services():
|
|||||||
|
|
||||||
# perform a second check to remove potential false positives
|
# perform a second check to remove potential false positives
|
||||||
service_procs = set()
|
service_procs = set()
|
||||||
retest_procs = {}
|
retest_procs = set()
|
||||||
for proc in check_procs:
|
for proc in check_procs:
|
||||||
files = _get_deleted_open_files(proc)
|
files = _get_deleted_open_files(proc)
|
||||||
if not files:
|
if not files:
|
||||||
@ -82,90 +82,71 @@ def restart_services():
|
|||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
exe = proc.exe()
|
exe = proc.exe()
|
||||||
parents = proc.parents()
|
|
||||||
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
||||||
# either of the above exceptions means the process has quit
|
# either of the above exceptions means the process has quit
|
||||||
continue
|
continue
|
||||||
|
parent = _get_top_parent(proc)
|
||||||
|
|
||||||
log.debug('will attempt to restart parent of {}'.format(proc))
|
|
||||||
if len(parents) < 2:
|
|
||||||
log.debug('{} is its own top parent'.format(proc))
|
|
||||||
parent = proc
|
|
||||||
else:
|
|
||||||
log.debug('{} has top parent {}'.format(proc, parents[-2]))
|
|
||||||
parent = parents[-2]
|
|
||||||
service_procs.add(parent)
|
service_procs.add(parent)
|
||||||
retest_procs[proc] = parent
|
retest_procs.add(proc)
|
||||||
|
|
||||||
processes = {}
|
processes = {}
|
||||||
services = {}
|
services = {}
|
||||||
for proc in service_procs:
|
for proc in service_procs:
|
||||||
try:
|
try:
|
||||||
service_exe = proc.exe()
|
service_exe = proc.exe()
|
||||||
proc_name = proc.name()
|
|
||||||
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
||||||
log.debug('{} died before it could be restarted'.format(proc))
|
log.debug('{} died before it could be restarted'.format(proc))
|
||||||
continue
|
continue
|
||||||
|
service_name = _get_service_from_proc(proc)
|
||||||
if proc_name in services:
|
|
||||||
processes[services[proc_name]].append(proc)
|
|
||||||
# we have already checked a process with this name
|
|
||||||
continue
|
|
||||||
|
|
||||||
service_name = conf.get('processes', proc_name, fallback=None)
|
|
||||||
if service_name == '':
|
|
||||||
log.debug('Ignoring process {}'.format(proc))
|
|
||||||
del retest_procs[proc]
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not service_name:
|
if not service_name:
|
||||||
# if the exe file has been deleted since started, service_exe will be empty
|
log.debug('no service for process {}'.format(proc))
|
||||||
# and we'll have to guess
|
continue
|
||||||
if not service_exe:
|
|
||||||
log.debug('Could not get full path to executable for process {}, will attempt to guess'.format(proc))
|
|
||||||
service_exe = get_exe_file(service_name)
|
|
||||||
if not service_exe:
|
|
||||||
log.error('Failed to find executable for process {}'.format(proc))
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
service_name = platform.identify_service_from_bin(service_exe)
|
|
||||||
except sau.errors.UnknownServiceError:
|
|
||||||
log.warning('Could not find service for process {}'.format(proc))
|
|
||||||
continue
|
|
||||||
|
|
||||||
services[proc_name] = service_name
|
services[proc_name] = service_name
|
||||||
processes[service_name] = [proc]
|
processes[service_name] = [proc]
|
||||||
|
|
||||||
for service_all in [x for x in services.values() if x]:
|
for service in set([x for x in services.values() if x]):
|
||||||
for service in service_all.split():
|
policy = _get_service_restart_policy(service)
|
||||||
policy = _get_service_restart_policy(service)
|
if policy == 'ignore':
|
||||||
if policy == 'ignore':
|
log.info('Service "{}" ignored by configuration'.format(service))
|
||||||
log.info('Service "{}" ignored by configuration'.format(service))
|
continue
|
||||||
continue
|
elif policy == 'warn':
|
||||||
elif policy == 'warn':
|
log.warning('Service "{}" has open deleted files and should be restarted'.format(service))
|
||||||
log.warning('Service "{}" has open deleted files and should be restarted'.format(service))
|
continue
|
||||||
continue
|
|
||||||
|
|
||||||
if not policy.startswith('silent'):
|
if not policy.startswith('silent'):
|
||||||
log.warning('Restarting service {}'.format(service))
|
log.warning('Restarting service {}'.format(service))
|
||||||
platform.restart_service(service)
|
platform.restart_service(service)
|
||||||
|
|
||||||
recommend_restart = False
|
recommend_restart = False
|
||||||
for proc, parent in retest_procs.items():
|
tested_parents = set()
|
||||||
|
for proc in retest_procs:
|
||||||
try:
|
try:
|
||||||
name = proc.name()
|
name = proc.name()
|
||||||
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
||||||
log.debug('{} was successfully killed'.format(proc))
|
log.debug('{} was successfully killed'.format(proc))
|
||||||
continue
|
continue
|
||||||
|
parent = _get_top_parent(proc)
|
||||||
|
if parent in tested_parents:
|
||||||
|
log.debug('{} belongs to already tested parent {}'.format(proc, parent))
|
||||||
|
continue
|
||||||
|
|
||||||
if _get_deleted_open_files(proc):
|
if _get_deleted_open_files(proc):
|
||||||
if name in services and not services[name]:
|
tested_parents.add(parent)
|
||||||
|
service = _get_service_from_proc(parent)
|
||||||
|
if not service:
|
||||||
|
log.warning('could not re-check process {} - failed to identify service'.format(proc))
|
||||||
|
recommend_restart = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
if service in services and not services[service]:
|
||||||
log.warning('{} (parent {}) does not belong to a service and could not be restarted'.format(proc, parent))
|
log.warning('{} (parent {}) does not belong to a service and could not be restarted'.format(proc, parent))
|
||||||
recommend_restart = True
|
recommend_restart = True
|
||||||
continue
|
continue
|
||||||
elif name in services:
|
elif service in services:
|
||||||
policy = _get_service_restart_policy(services[parent.name()])
|
policy = _get_service_restart_policy(service)
|
||||||
if policy in ('ignore', 'warn'):
|
if policy in ('ignore', 'warn'):
|
||||||
continue
|
continue
|
||||||
log.warning('{} (parent {}) still has deleted files open'.format(proc, parent))
|
log.warning('{} (parent {}) still has deleted files open'.format(proc, parent))
|
||||||
@ -186,3 +167,54 @@ def _get_service_restart_policy(service):
|
|||||||
return default_policy.lower()
|
return default_policy.lower()
|
||||||
log.warning('default service policy {} is invalid'.format(default_policy))
|
log.warning('default service policy {} is invalid'.format(default_policy))
|
||||||
return 'warn'
|
return 'warn'
|
||||||
|
|
||||||
|
def _get_service_from_proc(proc):
|
||||||
|
conf = sau.config
|
||||||
|
platform = sau.platforms.get_platform()
|
||||||
|
proc = _get_top_parent(proc)
|
||||||
|
log = logging.getLogger(sau.LOGNAME)
|
||||||
|
try:
|
||||||
|
proc_name = proc.name()
|
||||||
|
service_exe = proc.exe()
|
||||||
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
||||||
|
log.debug('{} died'.format(proc))
|
||||||
|
return None
|
||||||
|
|
||||||
|
service_name = conf.get('processes', proc_name, fallback=None)
|
||||||
|
if service_name == '':
|
||||||
|
log.debug('Ignoring process {}'.format(proc))
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not service_name:
|
||||||
|
# if the exe file has been deleted since started, service_exe will be empty
|
||||||
|
# and we'll have to guess
|
||||||
|
if not service_exe:
|
||||||
|
log.debug('Could not get full path to executable for process {}, will attempt to guess'.format(proc))
|
||||||
|
service_exe = get_exe_file(service_name)
|
||||||
|
if not service_exe:
|
||||||
|
log.error('Failed to find executable for process {}'.format(proc))
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
service_name = platform.identify_service_from_bin(service_exe)
|
||||||
|
except sau.errors.UnknownServiceError:
|
||||||
|
log.warning('Could not find service for process {}'.format(proc))
|
||||||
|
return None
|
||||||
|
return service_name
|
||||||
|
|
||||||
|
def _get_top_parent(proc):
|
||||||
|
log = logging.getLogger(sau.LOGNAME)
|
||||||
|
try:
|
||||||
|
parents = proc.parents()
|
||||||
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
||||||
|
# either of the above exceptions means the process has quit
|
||||||
|
return None
|
||||||
|
|
||||||
|
if len(parents) < 2:
|
||||||
|
log.debug('{} is its own top parent'.format(proc))
|
||||||
|
parent = proc
|
||||||
|
else:
|
||||||
|
log.debug('{} has top parent {}'.format(proc, parents[-2]))
|
||||||
|
parent = parents[-2]
|
||||||
|
|
||||||
|
return parent
|
||||||
|
Loading…
Reference in New Issue
Block a user