sau/sau/services.py

281 lines
9.6 KiB
Python

import logging
import os
import re
import time
import psutil
import sau
import sau.errors
import sau.helpers
import sau.platforms
proc_fd_map_re = re.compile(r'^.*(/(?:usr|lib|opt|etc|s?bin)[^\(]*) \(deleted\)$')
valid_service_policies=('restart', 'warn', 'ignore', 'silent-restart',
'reboot', 'silent-reboot')
def _warn(policy, msg):
log = logging.getLogger(sau.LOGNAME)
if not policy.startswith('silent'):
log.warning(msg)
def get_deleted_open_files(proc):
log = logging.getLogger(sau.LOGNAME)
files = set()
# try the linux-way first
maps_file = '/proc/{}/maps'.format(proc.pid)
if os.path.isfile(maps_file):
with open(maps_file, 'r') as f:
for line in f:
match = re.match(proc_fd_map_re, line)
if match:
fname = match.group(1)
files.add(fname)
return files
# on FreeBSD psutils open_files() helpfully returns a null path if a file
# has been deleted.
try:
for f in proc.open_files():
if f.path and os.path.exists(f.path):
continue
else:
files.add(f)
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
pass
return files
def get_exe_file(name):
log = logging.getLogger(sau.LOGNAME)
search_paths = [
'/bin'
'/sbin'
'/usr/bin',
'/usr/sbin',
'/usr/local/bin',
'/usr/local/sbin',
'/libexec',
'/usr/libexec',
'/usr/local/libexec'
]
for path in search_paths:
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
if name in files:
log.debug('Found binary for {} at {}'.format(name, root))
return os.path.join(root, name)
# return all processes with open files
def _get_processes():
log = logging.getLogger(sau.LOGNAME)
check_procs = set()
for proc in psutil.process_iter():
files = get_deleted_open_files(proc)
if files:
log.debug('{} has open deleted files'.format(proc))
check_procs.add(proc)
return check_procs
# Just return True if system is running on systemd
def on_systemd():
try:
init_proc = psutil.Process(pid=1)
if init_proc.name() == 'systemd':
return True
except psutil.NoSuchProcess:
pass
return False
def restart_services():
log = logging.getLogger(sau.LOGNAME)
platform = sau.platforms.get_platform()
conf = sau.config
check_procs = _get_processes()
# wait before the second test
time.sleep(5)
# perform a second check to remove potential false positives
service_procs = set()
retest_procs = set()
for proc in check_procs:
files = get_deleted_open_files(proc)
if not files:
# no deleted open files for this process any longer
continue
try:
exe = proc.exe()
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
# either of the above exceptions means the process has quit
continue
if on_systemd():
service_procs.add(proc)
else:
parent = _get_top_parent(proc)
service_procs.add(parent)
retest_procs.add(proc)
recommend_restart = False
services = {}
for proc in service_procs:
if not proc:
continue
service_name = None
try:
service_exe = proc.exe()
proc_name = proc.name()
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
log.debug('{} died before it could be restarted'.format(proc))
continue
service_name = _get_service_from_proc(proc)
if not service_name:
log.warning('no service for process {}'.format(proc))
recommend_restart = True
continue
if service_name == 'systemd':
log.info("Upgrade of systemd detected; doing daemon-reexec")
sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'daemon-reexec' ])
continue
elif service_name == '@ignore':
log.info(f"Process {proc} ignored by configuration")
retest_procs.discard(proc)
continue
services[proc_name] = service_name
for service in set([x for x in services.values() if x]):
policy = _get_service_restart_policy(service)
if policy == 'ignore':
log.info('Service "{}" ignored by configuration'.format(service))
[retest_procs.discard(x) for x,y in services.items() if y == service]
continue
elif policy == 'warn':
log.warning('Service "{}" has open deleted files and should be restarted'.format(service))
continue
elif 'reboot' in policy:
_warn(policy, 'Rebooting because {} has opened files'.format(service))
recommend_restart = True
_warn(policy, 'Restarting service {}'.format(service))
platform.restart_service(service)
tested_parents = set()
for proc in retest_procs:
try:
proc_name = proc.name()
if proc_name not in services:
continue
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
continue
if get_deleted_open_files(proc):
service = services[proc_name]
policy = _get_service_restart_policy(service)
_warn(policy, f'{proc} still has deleted files open')
recommend_restart = True
return recommend_restart
def _get_service_restart_policy(service):
log = logging.getLogger(sau.LOGNAME)
conf = sau.config
policy = conf.get('services', service, fallback=None)
if policy and policy.lower() in valid_service_policies:
return policy.lower()
elif policy:
log.warning('service policy {} for {} is invalid'.format(policy, service))
default_policy = conf.get('default', 'default_service_policy', fallback='warn')
if default_policy.lower() in ('restart', 'warn', 'ignore', 'silent-restart', 'reboot'):
return default_policy.lower()
log.warning('default service policy {} is invalid'.format(default_policy))
return 'warn'
def _get_service_from_proc(proc):
conf = sau.config
platform = sau.platforms.get_platform()
if not on_systemd():
proc = _get_top_parent(proc)
log = logging.getLogger(sau.LOGNAME)
try:
proc_name = proc.name()
service_exe = proc.exe()
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
log.debug('{} died'.format(proc))
return '@ignore'
service_name = conf.get('processes', proc_name, fallback=None)
log.debug(f'configuration of process "{proc_name}" in config: "{service_name}"')
if service_name == '':
log.debug('Ignoring process {}'.format(proc))
return '@ignore'
if not service_name:
# Systemd has it's own way...
if on_systemd():
if proc.pid == 1:
return 'systemd'
ret, unit, err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'whoami', f'{proc.pid}' ])
unit = unit.strip()
name, unit_type = unit.split('.')
if ret != 0:
log.debug(f'Non-success ({ret}) when checking unit for process: {err}')
return None
elif unit_type != 'service':
log.warning(f'not restarting non-service unit "{unit}"; owner of {proc}')
return None
elif name.startswith('user@'):
log.warning(f'Not restarting user service {unit}; please log out and log in again')
return None
else:
policy = conf.get('services', name, fallback=None)
if policy and policy.lower() in valid_service_policies:
return name
_ret, enabled, _err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'is-enabled', unit ])
enabled = enabled.strip()
if enabled not in ('enabled', 'static'):
log.warning(f'Unit {name}.service has enable status: {enabled} - will only restart "enabled" services')
return None
else:
return name
log.error(f'This should be an unreachable path when checking process {proc}')
return None
# if the exe file has been deleted since started, service_exe will be empty
# and we'll have to guess
if not service_exe:
log.debug('Could not get full path to executable for process {}, will attempt to guess'.format(proc))
service_exe = get_exe_file(service_name)
if not service_exe:
log.error('Failed to find executable for process {}'.format(proc))
return None
try:
service_name = platform.identify_service_from_bin(service_exe)
except sau.errors.UnknownServiceError:
log.warning('Could not find service for process {}'.format(proc))
return None
return service_name
def _get_top_parent(proc):
log = logging.getLogger(sau.LOGNAME)
try:
parents = proc.parents()
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
# either of the above exceptions means the process has quit
return None
if len(parents) < 2:
log.debug('{} is its own top parent'.format(proc))
parent = proc
else:
log.debug('{} has top parent {}'.format(proc, parents[-2]))
parent = parents[-2]
return parent