diff options
author | showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4> | 2009-04-16 03:10:11 +0000 |
---|---|---|
committer | showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4> | 2009-04-16 03:10:11 +0000 |
commit | 8666a48338bd83284f1182470c954cc1d32b1d80 (patch) | |
tree | ae8b118eab4f4cd3ed5be27f8128cc1f1160ab3d /scheduler | |
parent | 8f659c4c7c1b0dd6d55d0785b5f7ffbd073ac5fc (diff) |
Add information collecting method so we can see what state the system was in when
the scheduler stalls.
uptime, iostat, ps auxwww, mysqladmin showprocesslist
Signed-off-by: Scott Zawalski <scottz@google.com>
git-svn-id: svn://test.kernel.org/autotest/trunk@2994 592f7852-d20e-0410-864c-8624ca9c26a4
Diffstat (limited to 'scheduler')
-rw-r--r-- | scheduler/monitor_db_babysitter | 40 |
1 files changed, 39 insertions, 1 deletions
diff --git a/scheduler/monitor_db_babysitter b/scheduler/monitor_db_babysitter index 618fa57e..0ae0a7b5 100644 --- a/scheduler/monitor_db_babysitter +++ b/scheduler/monitor_db_babysitter @@ -1,6 +1,8 @@ #!/usr/bin/python -u import os, sys, signal, time, subprocess, logging, logging.config from optparse import OptionParser +import common +from autotest_lib.client.common_lib import error, global_config, utils PAUSE_LENGTH = 60 STALL_TIMEOUT = 2*60*60 @@ -32,6 +34,19 @@ if len(args) != 0: sys.exit(1) +def run_banner_output(cmd): + """Returns ------ CMD ------\nCMD_OUTPUT in a string""" + banner_output = '%s\n%%s\n\n' % cmd.center(60, '-') + command_output = '' + try: + cmd_out = utils.run(cmd, ignore_status=True, timeout=30) + command_output = cmd_out.stdout + cmd_out.stderr + except error.CmdError: + command_output = 'Timed out' + + return banner_output % command_output + + def kill_all_monitors(): logging.info("Killing all monitor_dbs") # try shutdown first @@ -86,15 +101,38 @@ class MonitorProc: self.last_log_change = time.time() elif self.last_log_change + STALL_TIMEOUT < time.time(): logging.info("monitor_db STALLED") + self.collect_stalled_info() return False return True + def collect_stalled_info(self): + INFO_TO_COLLECT = ['uptime', + 'ps auxwww', + 'iostat -k -x 2 4', + ] + db_cmd = '/usr/bin/mysqladmin --verbose processlist -u%s -p%s' + config = global_config.global_config + try: + user = config.get_config_value("BACKUP", "user") + password = config.get_config_value("BACKUP", "password") + db_cmd %= (user, password) + INFO_TO_COLLECT.append(db_cmd) + except global_config.ConfigError: + pass + stall_log_path = self.log_path + '.stall_info' + log = open(stall_log_path, "w") + for cmd in INFO_TO_COLLECT: + log.write(run_banner_output(cmd)) + + log.close() + + logging.info("initializing") if os.getuid() == 0: - logging.critical("run as root! aborting!") + logging.critical("running as root, aborting!") sys.exit(1) utils.write_pid("monitor_db_babysitter") |