summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetri Latvala <petri.latvala@intel.com>2018-04-26 16:54:12 +0300
committerPetri Latvala <petri.latvala@intel.com>2018-04-26 16:54:12 +0300
commita04b8fd868a0998fbc25f3dbc7038dae3fdffdc8 (patch)
tree688df16cf5936ef52c32a0f5f0e942013cdb72da
parenta6759b0d36fa7c48a84c8bea9fc5c443df1b9ad3 (diff)
Watchdog support
-rw-r--r--runner/executor.c133
1 files changed, 133 insertions, 0 deletions
diff --git a/runner/executor.c b/runner/executor.c
index 0b7d6608..c42cbe93 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -1,9 +1,11 @@
#include <errno.h>
#include <fcntl.h>
+#include <linux/watchdog.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/ioctl.h>
#include <sys/select.h>
#include <sys/signalfd.h>
#include <sys/stat.h>
@@ -64,6 +66,95 @@ error:
return -errno;
}
+struct watchdogs
+{
+ int *fds;
+ size_t num_dogs;
+} watchdogs;
+
+static void close_watchdogs(struct settings *settings)
+{
+ size_t i;
+
+ if (settings && settings->log_level >= LOG_LEVEL_VERBOSE)
+ printf("Closing watchdogs\n");
+
+ for (i = 0; i < watchdogs.num_dogs; i++) {
+ write(watchdogs.fds[i], "V", 1);
+ close(watchdogs.fds[i]);
+ }
+}
+
+static void close_watchdogs_atexit()
+{
+ close_watchdogs(NULL);
+}
+
+static void init_watchdogs(struct settings *settings)
+{
+ int i;
+ char name[32];
+ int fd;
+
+ memset(&watchdogs, 0, sizeof(watchdogs));
+
+ if (!settings->use_watchdog || settings->inactivity_timeout <= 0)
+ return;
+
+ if (settings->log_level >= LOG_LEVEL_VERBOSE) {
+ printf("Initializing watchdogs\n");
+ }
+
+ atexit(close_watchdogs_atexit);
+
+ for (i = 0; ; i++) {
+ snprintf(name, 32, "/dev/watchdog%d", i);
+ if ((fd = open(name, O_RDWR | O_CLOEXEC)) < 0)
+ break;
+
+ watchdogs.num_dogs++;
+ watchdogs.fds = realloc(watchdogs.fds, watchdogs.num_dogs * sizeof(int));
+ watchdogs.fds[i] = fd;
+
+ if (settings->log_level >= LOG_LEVEL_VERBOSE)
+ printf(" %s\n", name);
+ }
+}
+
+static int watchdogs_set_timeout(int timeout)
+{
+ size_t i;
+ int orig_timeout = timeout;
+
+ for (i = 0; i < watchdogs.num_dogs; i++) {
+ if (ioctl(watchdogs.fds[i], WDIOC_SETTIMEOUT, &timeout)) {
+ write(watchdogs.fds[i], "V", 1);
+ close(watchdogs.fds[i]);
+ watchdogs.fds[i] = -1;
+ continue;
+ }
+
+ if (timeout < orig_timeout) {
+ /*
+ * Timeout of this caliber refused. We want to
+ * use the same timeout for all devices.
+ */
+ return watchdogs_set_timeout(timeout);
+ }
+ }
+
+ return timeout;
+}
+
+static void ping_watchdogs()
+{
+ size_t i;
+
+ for (i = 0; i < watchdogs.num_dogs; i++) {
+ ioctl(watchdogs.fds[i], WDIOC_KEEPALIVE, 0);
+ }
+}
+
static void prune_subtest(struct job_list_entry *entry, char *subtest)
{
char *excl;
@@ -289,6 +380,8 @@ static int monitor_output(pid_t child,
int n, status;
int nfds = outfd;
int timeout = settings->inactivity_timeout;
+ int timeout_intervals = 1, intervals_left;
+ int wd_extra = 10;
int killed = 0; /* 1 = sigterm sent, 2 = sigkill sent */
struct timespec time_beg, time_end;
bool aborting = false;
@@ -303,6 +396,30 @@ static int monitor_output(pid_t child,
nfds = sigfd;
nfds++;
+ if (timeout > 0) {
+ /*
+ * Use original timeout plus some leeway. If we're still
+ * alive, we want to kill the test process instead of cutting
+ * power.
+ */
+ int wd_timeout = watchdogs_set_timeout(timeout + wd_extra);
+
+ if (wd_timeout < timeout + wd_extra) {
+ /* Watchdog timeout smaller, so ping it more often */
+ if (wd_timeout - wd_extra < 0)
+ wd_extra = wd_timeout / 2;
+ timeout_intervals = timeout / (wd_timeout - wd_extra);
+ intervals_left = timeout_intervals;
+ timeout /= timeout_intervals;
+
+ if (settings->log_level >= LOG_LEVEL_VERBOSE) {
+ printf("Watchdog doesn't support the timeout we requested (shortened to %d seconds).\n"
+ "Using %d intervals of %d seconds.\n",
+ wd_timeout, timeout_intervals, timeout);
+ }
+ }
+ }
+
while (outfd >= 0 || errfd >= 0 || sigfd >= 0) {
struct timeval tv = { .tv_sec = timeout };
@@ -323,6 +440,13 @@ static int monitor_output(pid_t child,
}
if (n == 0) {
+ intervals_left--;
+ if (intervals_left) {
+ continue;
+ }
+
+ ping_watchdogs();
+
switch (killed) {
case 0:
if (settings->log_level >= LOG_LEVEL_NORMAL) {
@@ -358,6 +482,9 @@ static int monitor_output(pid_t child,
continue;
}
+ intervals_left = timeout_intervals;
+ ping_watchdogs();
+
if (outfd >= 0 && FD_ISSET(outfd, &set)) {
char *newline;
@@ -889,6 +1016,8 @@ bool execute(struct execute_state *state,
return false;
}
+ init_watchdogs(settings);
+
if (!uname(&unamebuf)) {
dprintf(unamefd, "%s %s %s %s %s\n",
unamebuf.sysname,
@@ -911,6 +1040,7 @@ bool execute(struct execute_state *state,
if (result <= 0) {
close(testdirfd);
close(resdirfd);
+ close_watchdogs(settings);
if (result < 0) {
memset(state, 0, sizeof(*state));
initialize_execute_from_resume(state, settings, job_list);
@@ -920,5 +1050,8 @@ bool execute(struct execute_state *state,
}
}
+ close(testdirfd);
+ close(resdirfd);
+ close_watchdogs(settings);
return true;
}