summaryrefslogtreecommitdiff
path: root/scheduler
diff options
context:
space:
mode:
authorshoward <showard@592f7852-d20e-0410-864c-8624ca9c26a4>2009-04-16 03:10:11 +0000
committershoward <showard@592f7852-d20e-0410-864c-8624ca9c26a4>2009-04-16 03:10:11 +0000
commit8666a48338bd83284f1182470c954cc1d32b1d80 (patch)
treeae8b118eab4f4cd3ed5be27f8128cc1f1160ab3d /scheduler
parent8f659c4c7c1b0dd6d55d0785b5f7ffbd073ac5fc (diff)
Add information collecting method so we can see what state the system was in when
the scheduler stalls. uptime, iostat, ps auxwww, mysqladmin showprocesslist Signed-off-by: Scott Zawalski <scottz@google.com> git-svn-id: svn://test.kernel.org/autotest/trunk@2994 592f7852-d20e-0410-864c-8624ca9c26a4
Diffstat (limited to 'scheduler')
-rw-r--r--scheduler/monitor_db_babysitter40
1 files changed, 39 insertions, 1 deletions
diff --git a/scheduler/monitor_db_babysitter b/scheduler/monitor_db_babysitter
index 618fa57e..0ae0a7b5 100644
--- a/scheduler/monitor_db_babysitter
+++ b/scheduler/monitor_db_babysitter
@@ -1,6 +1,8 @@
#!/usr/bin/python -u
import os, sys, signal, time, subprocess, logging, logging.config
from optparse import OptionParser
+import common
+from autotest_lib.client.common_lib import error, global_config, utils
PAUSE_LENGTH = 60
STALL_TIMEOUT = 2*60*60
@@ -32,6 +34,19 @@ if len(args) != 0:
sys.exit(1)
+def run_banner_output(cmd):
+ """Returns ------ CMD ------\nCMD_OUTPUT in a string"""
+ banner_output = '%s\n%%s\n\n' % cmd.center(60, '-')
+ command_output = ''
+ try:
+ cmd_out = utils.run(cmd, ignore_status=True, timeout=30)
+ command_output = cmd_out.stdout + cmd_out.stderr
+ except error.CmdError:
+ command_output = 'Timed out'
+
+ return banner_output % command_output
+
+
def kill_all_monitors():
logging.info("Killing all monitor_dbs")
# try shutdown first
@@ -86,15 +101,38 @@ class MonitorProc:
self.last_log_change = time.time()
elif self.last_log_change + STALL_TIMEOUT < time.time():
logging.info("monitor_db STALLED")
+ self.collect_stalled_info()
return False
return True
+ def collect_stalled_info(self):
+ INFO_TO_COLLECT = ['uptime',
+ 'ps auxwww',
+ 'iostat -k -x 2 4',
+ ]
+ db_cmd = '/usr/bin/mysqladmin --verbose processlist -u%s -p%s'
+ config = global_config.global_config
+ try:
+ user = config.get_config_value("BACKUP", "user")
+ password = config.get_config_value("BACKUP", "password")
+ db_cmd %= (user, password)
+ INFO_TO_COLLECT.append(db_cmd)
+ except global_config.ConfigError:
+ pass
+ stall_log_path = self.log_path + '.stall_info'
+ log = open(stall_log_path, "w")
+ for cmd in INFO_TO_COLLECT:
+ log.write(run_banner_output(cmd))
+
+ log.close()
+
+
logging.info("initializing")
if os.getuid() == 0:
- logging.critical("run as root! aborting!")
+ logging.critical("running as root, aborting!")
sys.exit(1)
utils.write_pid("monitor_db_babysitter")