diff options
author | Petri Latvala <petri.latvala@intel.com> | 2018-04-26 16:54:12 +0300 |
---|---|---|
committer | Petri Latvala <petri.latvala@intel.com> | 2018-04-26 16:54:12 +0300 |
commit | a04b8fd868a0998fbc25f3dbc7038dae3fdffdc8 (patch) | |
tree | 688df16cf5936ef52c32a0f5f0e942013cdb72da | |
parent | a6759b0d36fa7c48a84c8bea9fc5c443df1b9ad3 (diff) |
Watchdog support
-rw-r--r-- | runner/executor.c | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/runner/executor.c b/runner/executor.c index 0b7d6608..c42cbe93 100644 --- a/runner/executor.c +++ b/runner/executor.c @@ -1,9 +1,11 @@ #include <errno.h> #include <fcntl.h> +#include <linux/watchdog.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/ioctl.h> #include <sys/select.h> #include <sys/signalfd.h> #include <sys/stat.h> @@ -64,6 +66,95 @@ error: return -errno; } +struct watchdogs +{ + int *fds; + size_t num_dogs; +} watchdogs; + +static void close_watchdogs(struct settings *settings) +{ + size_t i; + + if (settings && settings->log_level >= LOG_LEVEL_VERBOSE) + printf("Closing watchdogs\n"); + + for (i = 0; i < watchdogs.num_dogs; i++) { + write(watchdogs.fds[i], "V", 1); + close(watchdogs.fds[i]); + } +} + +static void close_watchdogs_atexit() +{ + close_watchdogs(NULL); +} + +static void init_watchdogs(struct settings *settings) +{ + int i; + char name[32]; + int fd; + + memset(&watchdogs, 0, sizeof(watchdogs)); + + if (!settings->use_watchdog || settings->inactivity_timeout <= 0) + return; + + if (settings->log_level >= LOG_LEVEL_VERBOSE) { + printf("Initializing watchdogs\n"); + } + + atexit(close_watchdogs_atexit); + + for (i = 0; ; i++) { + snprintf(name, 32, "/dev/watchdog%d", i); + if ((fd = open(name, O_RDWR | O_CLOEXEC)) < 0) + break; + + watchdogs.num_dogs++; + watchdogs.fds = realloc(watchdogs.fds, watchdogs.num_dogs * sizeof(int)); + watchdogs.fds[i] = fd; + + if (settings->log_level >= LOG_LEVEL_VERBOSE) + printf(" %s\n", name); + } +} + +static int watchdogs_set_timeout(int timeout) +{ + size_t i; + int orig_timeout = timeout; + + for (i = 0; i < watchdogs.num_dogs; i++) { + if (ioctl(watchdogs.fds[i], WDIOC_SETTIMEOUT, &timeout)) { + write(watchdogs.fds[i], "V", 1); + close(watchdogs.fds[i]); + watchdogs.fds[i] = -1; + continue; + } + + if (timeout < orig_timeout) { + /* + * Timeout of this caliber refused. We want to + * use the same timeout for all devices. + */ + return watchdogs_set_timeout(timeout); + } + } + + return timeout; +} + +static void ping_watchdogs() +{ + size_t i; + + for (i = 0; i < watchdogs.num_dogs; i++) { + ioctl(watchdogs.fds[i], WDIOC_KEEPALIVE, 0); + } +} + static void prune_subtest(struct job_list_entry *entry, char *subtest) { char *excl; @@ -289,6 +380,8 @@ static int monitor_output(pid_t child, int n, status; int nfds = outfd; int timeout = settings->inactivity_timeout; + int timeout_intervals = 1, intervals_left; + int wd_extra = 10; int killed = 0; /* 1 = sigterm sent, 2 = sigkill sent */ struct timespec time_beg, time_end; bool aborting = false; @@ -303,6 +396,30 @@ static int monitor_output(pid_t child, nfds = sigfd; nfds++; + if (timeout > 0) { + /* + * Use original timeout plus some leeway. If we're still + * alive, we want to kill the test process instead of cutting + * power. + */ + int wd_timeout = watchdogs_set_timeout(timeout + wd_extra); + + if (wd_timeout < timeout + wd_extra) { + /* Watchdog timeout smaller, so ping it more often */ + if (wd_timeout - wd_extra < 0) + wd_extra = wd_timeout / 2; + timeout_intervals = timeout / (wd_timeout - wd_extra); + intervals_left = timeout_intervals; + timeout /= timeout_intervals; + + if (settings->log_level >= LOG_LEVEL_VERBOSE) { + printf("Watchdog doesn't support the timeout we requested (shortened to %d seconds).\n" + "Using %d intervals of %d seconds.\n", + wd_timeout, timeout_intervals, timeout); + } + } + } + while (outfd >= 0 || errfd >= 0 || sigfd >= 0) { struct timeval tv = { .tv_sec = timeout }; @@ -323,6 +440,13 @@ static int monitor_output(pid_t child, } if (n == 0) { + intervals_left--; + if (intervals_left) { + continue; + } + + ping_watchdogs(); + switch (killed) { case 0: if (settings->log_level >= LOG_LEVEL_NORMAL) { @@ -358,6 +482,9 @@ static int monitor_output(pid_t child, continue; } + intervals_left = timeout_intervals; + ping_watchdogs(); + if (outfd >= 0 && FD_ISSET(outfd, &set)) { char *newline; @@ -889,6 +1016,8 @@ bool execute(struct execute_state *state, return false; } + init_watchdogs(settings); + if (!uname(&unamebuf)) { dprintf(unamefd, "%s %s %s %s %s\n", unamebuf.sysname, @@ -911,6 +1040,7 @@ bool execute(struct execute_state *state, if (result <= 0) { close(testdirfd); close(resdirfd); + close_watchdogs(settings); if (result < 0) { memset(state, 0, sizeof(*state)); initialize_execute_from_resume(state, settings, job_list); @@ -920,5 +1050,8 @@ bool execute(struct execute_state *state, } } + close(testdirfd); + close(resdirfd); + close_watchdogs(settings); return true; } |