250 lines
8.3 KiB
Python
250 lines
8.3 KiB
Python
import logging
|
|
import os
|
|
import re
|
|
import time
|
|
|
|
import psutil
|
|
|
|
import sau
|
|
import sau.errors
|
|
import sau.helpers
|
|
import sau.platforms
|
|
|
|
proc_fd_map_re = re.compile(r'^.*(/[^\(]*) \(deleted\)$')
|
|
|
|
def _warn(policy, msg):
|
|
log = logging.getLogger(sau.LOGNAME)
|
|
if not policy.startswith('silent'):
|
|
log.warning(msg)
|
|
|
|
def get_deleted_open_files(proc):
|
|
log = logging.getLogger(sau.LOGNAME)
|
|
files = set()
|
|
|
|
# try the linux-way first
|
|
maps_file = '/proc/{}/maps'.format(proc.pid)
|
|
if os.path.isfile(maps_file):
|
|
with open(maps_file, 'r') as f:
|
|
for line in f:
|
|
match = re.match(proc_fd_map_re, line)
|
|
if match:
|
|
files.add(match.group(1))
|
|
return files
|
|
|
|
# on FreeBSD psutils open_files() helpfully returns a null path if a file
|
|
# has been deleted.
|
|
try:
|
|
for f in proc.open_files():
|
|
if f.path and os.path.exists(f.path):
|
|
continue
|
|
else:
|
|
files.add(f)
|
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
|
pass
|
|
return files
|
|
|
|
def get_exe_file(name):
|
|
log = logging.getLogger(sau.LOGNAME)
|
|
search_paths = [
|
|
'/bin'
|
|
'/sbin'
|
|
'/usr/bin',
|
|
'/usr/sbin',
|
|
'/usr/local/bin',
|
|
'/usr/local/sbin',
|
|
'/libexec',
|
|
'/usr/libexec',
|
|
'/usr/local/libexec'
|
|
]
|
|
for path in search_paths:
|
|
if os.path.isdir(path):
|
|
for root, dirs, files in os.walk(path):
|
|
if name in files:
|
|
log.debug('Found binary for {} at {}'.format(name, root))
|
|
return os.path.join(root, name)
|
|
|
|
# return all processes with open files
|
|
def _get_processes():
|
|
log = logging.getLogger(sau.LOGNAME)
|
|
check_procs = set()
|
|
for proc in psutil.process_iter():
|
|
files = get_deleted_open_files(proc)
|
|
if files:
|
|
log.debug('{} has open deleted files'.format(proc))
|
|
check_procs.add(proc)
|
|
|
|
return check_procs
|
|
|
|
def restart_services():
|
|
log = logging.getLogger(sau.LOGNAME)
|
|
platform = sau.platforms.get_platform()
|
|
conf = sau.config
|
|
|
|
check_procs = _get_processes()
|
|
# wait before the second test
|
|
time.sleep(5)
|
|
|
|
on_systemd = False
|
|
try:
|
|
init_proc = psutil.Process(pid=1)
|
|
if init_proc.name() == 'systemd':
|
|
on_systemd = True
|
|
except psutil.NoSuchProcess:
|
|
pass
|
|
|
|
# perform a second check to remove potential false positives
|
|
service_procs = set()
|
|
retest_procs = set()
|
|
for proc in check_procs:
|
|
files = get_deleted_open_files(proc)
|
|
if not files:
|
|
# no deleted open files for this process any longer
|
|
continue
|
|
try:
|
|
exe = proc.exe()
|
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
|
# either of the above exceptions means the process has quit
|
|
continue
|
|
if on_systemd:
|
|
service_procs.add(proc)
|
|
else:
|
|
parent = _get_top_parent(proc)
|
|
service_procs.add(parent)
|
|
|
|
retest_procs.add(proc)
|
|
|
|
recommend_restart = False
|
|
processes = {}
|
|
services = {}
|
|
for proc in service_procs:
|
|
if not proc:
|
|
continue
|
|
|
|
service_name = None
|
|
try:
|
|
service_exe = proc.exe()
|
|
proc_name = proc.name()
|
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
|
log.debug('{} died before it could be restarted'.format(proc))
|
|
continue
|
|
|
|
if on_systemd:
|
|
ret, unit, err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'whoami', f'{proc.pid}' ])
|
|
name, unit_type = unit.strip().split('.')
|
|
if ret != 0:
|
|
log.debug(f'Non-success ({ret}) when checking unit for process: {err}')
|
|
continue
|
|
elif unit_type != 'service':
|
|
log.warning(f'not restarting non-service unit "{unit}"; owner of {proc}')
|
|
else:
|
|
_ret, enabled, _err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'is-enabled', unit ])
|
|
enabled = enabled.strip()
|
|
if enabled != 'enabled':
|
|
log.warning('Unit {name}.service has enable status: {enabled} - will only restart "enabled" services')
|
|
else:
|
|
service_name = name
|
|
else:
|
|
service_name = _get_service_from_proc(proc)
|
|
|
|
if not service_name:
|
|
log.warning('no service for process {}'.format(proc))
|
|
recommend_restart = True
|
|
continue
|
|
|
|
services[proc_name] = service_name
|
|
processes[service_name] = [proc]
|
|
|
|
for service in set([x for x in services.values() if x]):
|
|
policy = _get_service_restart_policy(service)
|
|
if policy == 'ignore':
|
|
log.info('Service "{}" ignored by configuration'.format(service))
|
|
continue
|
|
elif policy == 'warn':
|
|
log.warning('Service "{}" has open deleted files and should be restarted'.format(service))
|
|
continue
|
|
elif 'reboot' in policy:
|
|
_warn(policy, 'Rebooting because {} has opened files'.format(service))
|
|
recommend_restart = True
|
|
_warn(policy, 'Restarting service {}'.format(service))
|
|
platform.restart_service(service)
|
|
|
|
tested_parents = set()
|
|
for proc in retest_procs:
|
|
try:
|
|
proc_name = proc.name()
|
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
|
continue
|
|
|
|
if get_deleted_open_files(proc):
|
|
service = services[proc_name]
|
|
policy = _get_service_restart_policy(service)
|
|
_warn(policy, '{} still has deleted files open'.format(proc, parent))
|
|
recommend_restart = True
|
|
return recommend_restart
|
|
|
|
def _get_service_restart_policy(service):
|
|
log = logging.getLogger(sau.LOGNAME)
|
|
conf = sau.config
|
|
policy = conf.get('services', service, fallback=None)
|
|
if policy and policy.lower() in ('restart', 'warn', 'ignore', 'silent-restart', 'reboot', 'silent-reboot'):
|
|
return policy.lower()
|
|
elif policy:
|
|
log.warning('service policy {} for {} is invalid'.format(policy, service))
|
|
|
|
default_policy = conf.get('default', 'default_service_policy', fallback='warn')
|
|
if default_policy.lower() in ('restart', 'warn', 'ignore', 'silent-restart', 'reboot'):
|
|
return default_policy.lower()
|
|
log.warning('default service policy {} is invalid'.format(default_policy))
|
|
return 'warn'
|
|
|
|
def _get_service_from_proc(proc):
|
|
conf = sau.config
|
|
platform = sau.platforms.get_platform()
|
|
proc = _get_top_parent(proc)
|
|
log = logging.getLogger(sau.LOGNAME)
|
|
try:
|
|
proc_name = proc.name()
|
|
service_exe = proc.exe()
|
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
|
log.debug('{} died'.format(proc))
|
|
return None
|
|
|
|
service_name = conf.get('processes', proc_name, fallback=None)
|
|
if service_name == '':
|
|
log.debug('Ignoring process {}'.format(proc))
|
|
return None
|
|
|
|
if not service_name:
|
|
# if the exe file has been deleted since started, service_exe will be empty
|
|
# and we'll have to guess
|
|
if not service_exe:
|
|
log.debug('Could not get full path to executable for process {}, will attempt to guess'.format(proc))
|
|
service_exe = get_exe_file(service_name)
|
|
if not service_exe:
|
|
log.error('Failed to find executable for process {}'.format(proc))
|
|
return None
|
|
|
|
try:
|
|
service_name = platform.identify_service_from_bin(service_exe)
|
|
except sau.errors.UnknownServiceError:
|
|
log.warning('Could not find service for process {}'.format(proc))
|
|
return None
|
|
return service_name
|
|
|
|
def _get_top_parent(proc):
|
|
log = logging.getLogger(sau.LOGNAME)
|
|
try:
|
|
parents = proc.parents()
|
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
|
# either of the above exceptions means the process has quit
|
|
return None
|
|
|
|
if len(parents) < 2:
|
|
log.debug('{} is its own top parent'.format(proc))
|
|
parent = proc
|
|
else:
|
|
log.debug('{} has top parent {}'.format(proc, parents[-2]))
|
|
parent = parents[-2]
|
|
|
|
return parent
|