From 8abf7d89f6e437537a21524ea15326acd5701ca2 Mon Sep 17 00:00:00 2001 From: Fredrik Eriksson Date: Sun, 14 Jul 2024 12:37:46 +0200 Subject: [PATCH] update readme --- README.md | 94 +++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 +- 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 41c2268..ae01cba 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,95 @@ # sysalert +Generic OnFailure= and OnSuccess= handler for systemd -Utility to send notifications from systemd using OnFailure= and OnSuccess= hooks. \ No newline at end of file +## Purpose +This tool is intended to be used to send notifications when a systemd service fails. It is installed by setting `sysalert-failure@%n.service` and `sysalert-success@%n.service` as OnFailure= and OnSuccess=-handlers in the systemd service files. + +The primary purpose is to keep track of services triggerd by timers and paths and similar, but it +can be used to montitor any systemd service. + +## Features and inner workings + - ignore X failures before sending notification + - do not send repeated notifications of the same problem + - send recovery notifications + - flexible alert mechanism + +On a high level sysalert works like this: + +When sysalert-failure is triggered the triggering service exit status, invocation ID and a timestamp +is saved to a sqlite database. Based on previous results and configuration in `/etc/sysalert.ini` a +notification is sent using the configured alert method. + +When sysalert-success is triggered sysalert will send a notification about service +recovery (if enabled) as well as clear the sqlite database from any failures from the triggering service. + + +## Installation +[Build and install](https://packaging.python.org/en/latest/tutorials/packaging-projects/) the python +package and install the configuration file and systemd services: +``` +cp config/sysalert.ini /etc/ +cp systemd/sysalert-failure@.service systemd/sysalert-success@.service /etc/systemd/system/ +mkdir /etc/systemd/system/sysalert-.service.d +cp systemd/overrides/sysalert-.service.d.conf /etc/systemd/system/sysalert-.service.d/10-sysalert.conf +systemctl daemon-reload +``` + +Once everything is installed you can set `sysalert-failure@%n.service` and `sysalert-success@%n.service` as OnFailure= and OnSuccess=-handlers in any service unit to get an email notification on failure. +It is also possible to set this system-wide by creating +`/etc/systemd/system/service.d/10-sysalert.conf` like so: +``` +[Unit] +OnFailure=sysalert-failure@%n.service +OnSuccess=sysalert-success@%n.service +``` +**WARNING:** setting a system-wide handler like this will override any OnFailure= or OnSuccess= set +in service files, and modifying dependencies for sysalert may cause the system to fail at boot. Only +do this if you're sure it works on your system or are ready to troubleshoot boot failures. + + +There is also a [Gentoo ebuild](https://gitea.fulh.ax/feffe/feffe-portage-overlay/src/branch/master/sys-apps/sysalert) +I made for my own convinience, but beware as the ebuild installs sysalert as a system-wide handler +as described above. + +## Configuration +sysalert searches /etc/sysalert.ini for configuration; see example configuration in repo. + +Note that by default the sysalert-services depend on network.target, depending on your alert-methods +you may need to override this. + +## Alert methods +At the moment the only implemented alert method is 'sysalert.email' which uses smtp to send an email +about service problems. Currently the email content is not templated, but it does include the +journal log for the failed service as well as other nice-to-know information. + +sysalert uses dynamic imports to import the alert methods. sysalert.email is a python module +implemented in this package, but it can be any python module on your system that implements the +`success()` and `failure()` methods. + +### `success()` and `failure()` +Any module that implements these methods can be used as an alert-method. These methods takes three arguments: + + - **service_name** - name of the service + - **failures** - list of dicts containing data about previous (and current) failures. the list is + sorted on time with the first failure first and latest failure at the end. Currently the dicts include: + - `service_result` + - `exit_code` + - `exit_status` + - `invocation_id` + - `timestamp` + - `alert_method` + + - **config** - a dict containing all key-values defined in the configuration section for the + alert-method. For example 'sysalert.email'-section for 'sysalert.email' alert method. + +## Stuff to fix +This was a weekend project and is not very polished. Here are a few things that could probably be +improved: + - Fix hardcoded paths (config-file and database location) + - Implement command line tool (running `sysalert` manually should make it possible to update/clear + database entries, maybe reconfigure and see alert status) + - Proper packaging and maybe publish in pip + - Implement more handlers (maybe `sysalert.syslog`) + - Find a method to detect if a failed service was triggered manually or by a timer/path/other + service etc. Would be nice to be able to set this as default only on services triggered by + timers... diff --git a/pyproject.toml b/pyproject.toml index b48cb64..5cd6721 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ {name = "Fredrik Eriksson", email = "sysalert@fulh.ax"} ] dependencies = [ "systemd-python" ] -description = "generic OnFailure= and OnSuccess= handler for systemd" +description = "Generic OnFailure= and OnSuccess= handler for systemd" readme = "README.md" license = { file = "LICENSE" } keywords = [ "systemd" ]