import logging import os import re import time import psutil import sau import sau.errors import sau.helpers import sau.platforms proc_fd_map_re = re.compile(r'^.*(/(?:usr|lib|opt|etc|s?bin)[^\(]*) \(deleted\)$') valid_service_policies=('restart', 'warn', 'ignore', 'silent-restart', 'reboot', 'silent-reboot') def _warn(policy, msg): log = logging.getLogger(sau.LOGNAME) if not policy.startswith('silent'): log.warning(msg) def get_deleted_open_files(proc): log = logging.getLogger(sau.LOGNAME) files = set() # try the linux-way first maps_file = '/proc/{}/maps'.format(proc.pid) if os.path.isfile(maps_file): with open(maps_file, 'r') as f: for line in f: match = re.match(proc_fd_map_re, line) if match: fname = match.group(1) files.add(fname) return files # on FreeBSD psutils open_files() helpfully returns a null path if a file # has been deleted. try: for f in proc.open_files(): if f.path and os.path.exists(f.path): continue else: files.add(f) except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied): pass return files def get_exe_file(name): log = logging.getLogger(sau.LOGNAME) search_paths = [ '/bin' '/sbin' '/usr/bin', '/usr/sbin', '/usr/local/bin', '/usr/local/sbin', '/libexec', '/usr/libexec', '/usr/local/libexec' ] for path in search_paths: if os.path.isdir(path): for root, dirs, files in os.walk(path): if name in files: log.debug('Found binary for {} at {}'.format(name, root)) return os.path.join(root, name) # return all processes with open files def _get_processes(): log = logging.getLogger(sau.LOGNAME) check_procs = set() for proc in psutil.process_iter(): files = get_deleted_open_files(proc) if files: log.debug('{} has open deleted files'.format(proc)) check_procs.add(proc) return check_procs # Just return True if system is running on systemd def on_systemd(): try: init_proc = psutil.Process(pid=1) if init_proc.name() == 'systemd': return True except psutil.NoSuchProcess: pass return False def restart_services(): log = logging.getLogger(sau.LOGNAME) platform = sau.platforms.get_platform() conf = sau.config check_procs = _get_processes() # wait before the second test time.sleep(5) # perform a second check to remove potential false positives service_procs = set() retest_procs = set() for proc in check_procs: files = get_deleted_open_files(proc) if not files: # no deleted open files for this process any longer continue try: exe = proc.exe() except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied): # either of the above exceptions means the process has quit continue if on_systemd(): service_procs.add(proc) else: parent = _get_top_parent(proc) service_procs.add(parent) retest_procs.add(proc) recommend_restart = False services = {} for proc in service_procs: if not proc: continue service_name = None try: service_exe = proc.exe() proc_name = proc.name() except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied): log.debug('{} died before it could be restarted'.format(proc)) continue service_name = _get_service_from_proc(proc) if not service_name: log.warning('no service for process {}'.format(proc)) recommend_restart = True continue if service_name == 'systemd': log.info("Upgrade of systemd detected; doing daemon-reexec") sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'daemon-reexec' ]) continue elif service_name == '@ignore': log.info(f"Process {proc} ignored by configuration") retest_procs.discard(proc) continue services[proc_name] = service_name for service in set([x for x in services.values() if x]): policy = _get_service_restart_policy(service) if policy == 'ignore': log.info('Service "{}" ignored by configuration'.format(service)) [retest_procs.discard(x) for x,y in services.items() if y == service] continue elif policy == 'warn': log.warning('Service "{}" has open deleted files and should be restarted'.format(service)) continue elif 'reboot' in policy: _warn(policy, 'Rebooting because {} has opened files'.format(service)) recommend_restart = True _warn(policy, 'Restarting service {}'.format(service)) platform.restart_service(service) tested_parents = set() for proc in retest_procs: try: proc_name = proc.name() if proc_name not in services: continue except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied): continue if get_deleted_open_files(proc): service = services[proc_name] policy = _get_service_restart_policy(service) _warn(policy, f'{proc} still has deleted files open') recommend_restart = True return recommend_restart def _get_service_restart_policy(service): log = logging.getLogger(sau.LOGNAME) conf = sau.config policy = conf.get('services', service, fallback=None) if policy and policy.lower() in valid_service_policies: return policy.lower() elif policy: log.warning('service policy {} for {} is invalid'.format(policy, service)) default_policy = conf.get('default', 'default_service_policy', fallback='warn') if default_policy.lower() in ('restart', 'warn', 'ignore', 'silent-restart', 'reboot'): return default_policy.lower() log.warning('default service policy {} is invalid'.format(default_policy)) return 'warn' def _get_service_from_proc(proc): conf = sau.config platform = sau.platforms.get_platform() if not on_systemd(): proc = _get_top_parent(proc) log = logging.getLogger(sau.LOGNAME) try: proc_name = proc.name() service_exe = proc.exe() except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied): log.debug('{} died'.format(proc)) return '@ignore' service_name = conf.get('processes', proc_name, fallback=None) log.debug(f'configuration of process "{proc_name}" in config: "{service_name}"') if service_name == '': log.debug('Ignoring process {}'.format(proc)) return '@ignore' if not service_name: # Systemd has it's own way... if on_systemd(): if proc.pid == 1: return 'systemd' ret, unit, err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'whoami', f'{proc.pid}' ]) unit = unit.strip() name, unit_type = unit.split('.') if ret != 0: log.debug(f'Non-success ({ret}) when checking unit for process: {err}') return None elif unit_type != 'service': log.warning(f'not restarting non-service unit "{unit}"; owner of {proc}') return None elif name.startswith('user@'): log.warning(f'Not restarting user service {unit}; please log out and log in again') return None else: policy = conf.get('services', name, fallback=None) if policy and policy.lower() in valid_service_policies: return name _ret, enabled, _err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'is-enabled', unit ]) enabled = enabled.strip() if enabled not in ('enabled', 'static'): log.warning(f'Unit {name}.service has enable status: {enabled} - will only restart "enabled" services') return None else: return name log.error(f'This should be an unreachable path when checking process {proc}') return None # if the exe file has been deleted since started, service_exe will be empty # and we'll have to guess if not service_exe: log.debug('Could not get full path to executable for process {}, will attempt to guess'.format(proc)) service_exe = get_exe_file(service_name) if not service_exe: log.error('Failed to find executable for process {}'.format(proc)) return None try: service_name = platform.identify_service_from_bin(service_exe) except sau.errors.UnknownServiceError: log.warning('Could not find service for process {}'.format(proc)) return None return service_name def _get_top_parent(proc): log = logging.getLogger(sau.LOGNAME) try: parents = proc.parents() except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied): # either of the above exceptions means the process has quit return None if len(parents) < 2: log.debug('{} is its own top parent'.format(proc)) parent = proc else: log.debug('{} has top parent {}'.format(proc, parents[-2])) parent = parents[-2] return parent