diff --git a/sau/services.py b/sau/services.py index 2966572..7aa8532 100644 --- a/sau/services.py +++ b/sau/services.py @@ -7,6 +7,7 @@ import psutil import sau import sau.errors +import sau.helpers import sau.platforms proc_fd_map_re = re.compile(r'^.*(/[^\(]*) \(deleted\)$') @@ -16,7 +17,7 @@ def _warn(policy, msg): if not policy.startswith('silent'): log.warning(msg) -def _get_deleted_open_files(proc): +def get_deleted_open_files(proc): log = logging.getLogger(sau.LOGNAME) files = set() @@ -62,25 +63,40 @@ def get_exe_file(name): log.debug('Found binary for {} at {}'.format(name, root)) return os.path.join(root, name) +# return all processes with open files +def _get_processes(): + log = logging.getLogger(sau.LOGNAME) + check_procs = set() + for proc in psutil.process_iter(): + files = get_deleted_open_files(proc) + if files: + log.debug('{} has open deleted files'.format(proc)) + check_procs.add(proc) + + return check_procs + def restart_services(): log = logging.getLogger(sau.LOGNAME) platform = sau.platforms.get_platform() conf = sau.config - check_procs = set() - for proc in psutil.process_iter(): - files = _get_deleted_open_files(proc) - if files: - log.info('{} has open deleted files'.format(proc)) - check_procs.add(proc) + check_procs = _get_processes() # wait before the second test - time.sleep(1) + time.sleep(5) + + on_systemd = False + try: + init_proc = psutil.Process(pid=1) + if init_proc.name() == 'systemd': + on_systemd = True + except psutil.NoSuchProcess: + pass # perform a second check to remove potential false positives service_procs = set() retest_procs = set() for proc in check_procs: - files = _get_deleted_open_files(proc) + files = get_deleted_open_files(proc) if not files: # no deleted open files for this process any longer continue @@ -89,32 +105,55 @@ def restart_services(): except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied): # either of the above exceptions means the process has quit continue - parent = _get_top_parent(proc) + if on_systemd: + service_procs.add(proc) + else: + parent = _get_top_parent(proc) + service_procs.add(parent) - service_procs.add(parent) retest_procs.add(proc) + recommend_restart = False processes = {} services = {} for proc in service_procs: if not proc: continue + + service_name = None try: service_exe = proc.exe() proc_name = proc.name() except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied): log.debug('{} died before it could be restarted'.format(proc)) continue - service_name = _get_service_from_proc(proc) + + if on_systemd: + ret, unit, err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'whoami', f'{proc.pid}' ]) + name, unit_type = unit.strip().split('.') + if ret != 0: + log.debug(f'Non-success ({ret}) when checking unit for process: {err}') + continue + elif unit_type != 'service': + log.warning(f'not restarting non-service unit "{unit}"; owner of {proc}') + else: + _ret, enabled, _err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'is-enabled', unit ]) + enabled = enabled.strip() + if enabled != 'enabled': + log.warning('Unit {name}.service has enable status: {enabled} - will only restart "enabled" services') + else: + service_name = name + else: + service_name = _get_service_from_proc(proc) if not service_name: - log.debug('no service for process {}'.format(proc)) + log.warning('no service for process {}'.format(proc)) + recommend_restart = True continue services[proc_name] = service_name processes[service_name] = [proc] - recommend_restart = False for service in set([x for x in services.values() if x]): policy = _get_service_restart_policy(service) if policy == 'ignore': @@ -131,34 +170,17 @@ def restart_services(): tested_parents = set() for proc in retest_procs: - parent = _get_top_parent(proc) - if not parent: - continue - parent_name = parent.name() - if parent in tested_parents: - log.debug('{} belongs to already tested parent {}'.format(proc, parent)) + try: + proc_name = proc.name() + if proc_name not in services: + continue + except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied): continue - if _get_deleted_open_files(proc): - tested_parents.add(parent) - service = _get_service_from_proc(parent) - if not service: - log.warning('could not re-check process {} - failed to identify service'.format(proc)) - recommend_restart = True - continue + if get_deleted_open_files(proc): + service = services[proc_name] policy = _get_service_restart_policy(service) - - log.debug('{} is in service {}'.format(proc, service)) - if parent_name in services and not services[parent_name]: - _warn(policy, '{} (parent {}) does not belong to a service and could not be restarted'.format(proc, parent)) - recommend_restart = True - continue - elif parent_name in services: - policy = _get_service_restart_policy(service) - log.debug('service {} has policy {}'.format(service, policy)) - if policy in ('ignore', 'warn'): - continue - _warn(policy, '{} (parent {}) still has deleted files open'.format(proc, parent)) + _warn(policy, '{} still has deleted files open'.format(proc, parent)) recommend_restart = True return recommend_restart