better support for service restarts on systemd
This commit is contained in:
parent
22a2b4557b
commit
b355a6ceb2
100
sau/services.py
100
sau/services.py
@ -7,6 +7,7 @@ import psutil
|
|||||||
|
|
||||||
import sau
|
import sau
|
||||||
import sau.errors
|
import sau.errors
|
||||||
|
import sau.helpers
|
||||||
import sau.platforms
|
import sau.platforms
|
||||||
|
|
||||||
proc_fd_map_re = re.compile(r'^.*(/[^\(]*) \(deleted\)$')
|
proc_fd_map_re = re.compile(r'^.*(/[^\(]*) \(deleted\)$')
|
||||||
@ -16,7 +17,7 @@ def _warn(policy, msg):
|
|||||||
if not policy.startswith('silent'):
|
if not policy.startswith('silent'):
|
||||||
log.warning(msg)
|
log.warning(msg)
|
||||||
|
|
||||||
def _get_deleted_open_files(proc):
|
def get_deleted_open_files(proc):
|
||||||
log = logging.getLogger(sau.LOGNAME)
|
log = logging.getLogger(sau.LOGNAME)
|
||||||
files = set()
|
files = set()
|
||||||
|
|
||||||
@ -62,25 +63,40 @@ def get_exe_file(name):
|
|||||||
log.debug('Found binary for {} at {}'.format(name, root))
|
log.debug('Found binary for {} at {}'.format(name, root))
|
||||||
return os.path.join(root, name)
|
return os.path.join(root, name)
|
||||||
|
|
||||||
|
# return all processes with open files
|
||||||
|
def _get_processes():
|
||||||
|
log = logging.getLogger(sau.LOGNAME)
|
||||||
|
check_procs = set()
|
||||||
|
for proc in psutil.process_iter():
|
||||||
|
files = get_deleted_open_files(proc)
|
||||||
|
if files:
|
||||||
|
log.debug('{} has open deleted files'.format(proc))
|
||||||
|
check_procs.add(proc)
|
||||||
|
|
||||||
|
return check_procs
|
||||||
|
|
||||||
def restart_services():
|
def restart_services():
|
||||||
log = logging.getLogger(sau.LOGNAME)
|
log = logging.getLogger(sau.LOGNAME)
|
||||||
platform = sau.platforms.get_platform()
|
platform = sau.platforms.get_platform()
|
||||||
conf = sau.config
|
conf = sau.config
|
||||||
check_procs = set()
|
|
||||||
for proc in psutil.process_iter():
|
|
||||||
files = _get_deleted_open_files(proc)
|
|
||||||
if files:
|
|
||||||
log.info('{} has open deleted files'.format(proc))
|
|
||||||
check_procs.add(proc)
|
|
||||||
|
|
||||||
|
check_procs = _get_processes()
|
||||||
# wait before the second test
|
# wait before the second test
|
||||||
time.sleep(1)
|
time.sleep(5)
|
||||||
|
|
||||||
|
on_systemd = False
|
||||||
|
try:
|
||||||
|
init_proc = psutil.Process(pid=1)
|
||||||
|
if init_proc.name() == 'systemd':
|
||||||
|
on_systemd = True
|
||||||
|
except psutil.NoSuchProcess:
|
||||||
|
pass
|
||||||
|
|
||||||
# perform a second check to remove potential false positives
|
# perform a second check to remove potential false positives
|
||||||
service_procs = set()
|
service_procs = set()
|
||||||
retest_procs = set()
|
retest_procs = set()
|
||||||
for proc in check_procs:
|
for proc in check_procs:
|
||||||
files = _get_deleted_open_files(proc)
|
files = get_deleted_open_files(proc)
|
||||||
if not files:
|
if not files:
|
||||||
# no deleted open files for this process any longer
|
# no deleted open files for this process any longer
|
||||||
continue
|
continue
|
||||||
@ -89,32 +105,55 @@ def restart_services():
|
|||||||
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
||||||
# either of the above exceptions means the process has quit
|
# either of the above exceptions means the process has quit
|
||||||
continue
|
continue
|
||||||
parent = _get_top_parent(proc)
|
if on_systemd:
|
||||||
|
service_procs.add(proc)
|
||||||
|
else:
|
||||||
|
parent = _get_top_parent(proc)
|
||||||
|
service_procs.add(parent)
|
||||||
|
|
||||||
service_procs.add(parent)
|
|
||||||
retest_procs.add(proc)
|
retest_procs.add(proc)
|
||||||
|
|
||||||
|
recommend_restart = False
|
||||||
processes = {}
|
processes = {}
|
||||||
services = {}
|
services = {}
|
||||||
for proc in service_procs:
|
for proc in service_procs:
|
||||||
if not proc:
|
if not proc:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
service_name = None
|
||||||
try:
|
try:
|
||||||
service_exe = proc.exe()
|
service_exe = proc.exe()
|
||||||
proc_name = proc.name()
|
proc_name = proc.name()
|
||||||
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
||||||
log.debug('{} died before it could be restarted'.format(proc))
|
log.debug('{} died before it could be restarted'.format(proc))
|
||||||
continue
|
continue
|
||||||
service_name = _get_service_from_proc(proc)
|
|
||||||
|
if on_systemd:
|
||||||
|
ret, unit, err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'whoami', f'{proc.pid}' ])
|
||||||
|
name, unit_type = unit.strip().split('.')
|
||||||
|
if ret != 0:
|
||||||
|
log.debug(f'Non-success ({ret}) when checking unit for process: {err}')
|
||||||
|
continue
|
||||||
|
elif unit_type != 'service':
|
||||||
|
log.warning(f'not restarting non-service unit "{unit}"; owner of {proc}')
|
||||||
|
else:
|
||||||
|
_ret, enabled, _err = sau.helpers.exec_cmd([ '/usr/bin/systemctl', 'is-enabled', unit ])
|
||||||
|
enabled = enabled.strip()
|
||||||
|
if enabled != 'enabled':
|
||||||
|
log.warning(f'Unit {name}.service has enable status: {enabled} - will only restart "enabled" services')
|
||||||
|
else:
|
||||||
|
service_name = name
|
||||||
|
else:
|
||||||
|
service_name = _get_service_from_proc(proc)
|
||||||
|
|
||||||
if not service_name:
|
if not service_name:
|
||||||
log.debug('no service for process {}'.format(proc))
|
log.warning('no service for process {}'.format(proc))
|
||||||
|
recommend_restart = True
|
||||||
continue
|
continue
|
||||||
|
|
||||||
services[proc_name] = service_name
|
services[proc_name] = service_name
|
||||||
processes[service_name] = [proc]
|
processes[service_name] = [proc]
|
||||||
|
|
||||||
recommend_restart = False
|
|
||||||
for service in set([x for x in services.values() if x]):
|
for service in set([x for x in services.values() if x]):
|
||||||
policy = _get_service_restart_policy(service)
|
policy = _get_service_restart_policy(service)
|
||||||
if policy == 'ignore':
|
if policy == 'ignore':
|
||||||
@ -131,34 +170,17 @@ def restart_services():
|
|||||||
|
|
||||||
tested_parents = set()
|
tested_parents = set()
|
||||||
for proc in retest_procs:
|
for proc in retest_procs:
|
||||||
parent = _get_top_parent(proc)
|
try:
|
||||||
if not parent:
|
proc_name = proc.name()
|
||||||
continue
|
if proc_name not in services:
|
||||||
parent_name = parent.name()
|
continue
|
||||||
if parent in tested_parents:
|
except (psutil.NoSuchProcess, psutil.ZombieProcess, psutil.AccessDenied):
|
||||||
log.debug('{} belongs to already tested parent {}'.format(proc, parent))
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if _get_deleted_open_files(proc):
|
if get_deleted_open_files(proc):
|
||||||
tested_parents.add(parent)
|
service = services[proc_name]
|
||||||
service = _get_service_from_proc(parent)
|
|
||||||
if not service:
|
|
||||||
log.warning('could not re-check process {} - failed to identify service'.format(proc))
|
|
||||||
recommend_restart = True
|
|
||||||
continue
|
|
||||||
policy = _get_service_restart_policy(service)
|
policy = _get_service_restart_policy(service)
|
||||||
|
_warn(policy, '{} still has deleted files open'.format(proc, parent))
|
||||||
log.debug('{} is in service {}'.format(proc, service))
|
|
||||||
if parent_name in services and not services[parent_name]:
|
|
||||||
_warn(policy, '{} (parent {}) does not belong to a service and could not be restarted'.format(proc, parent))
|
|
||||||
recommend_restart = True
|
|
||||||
continue
|
|
||||||
elif parent_name in services:
|
|
||||||
policy = _get_service_restart_policy(service)
|
|
||||||
log.debug('service {} has policy {}'.format(service, policy))
|
|
||||||
if policy in ('ignore', 'warn'):
|
|
||||||
continue
|
|
||||||
_warn(policy, '{} (parent {}) still has deleted files open'.format(proc, parent))
|
|
||||||
recommend_restart = True
|
recommend_restart = True
|
||||||
return recommend_restart
|
return recommend_restart
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user