diff options
Diffstat (limited to 'stracer.py')
-rw-r--r-- | stracer.py | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/stracer.py b/stracer.py new file mode 100644 index 0000000..60b735f --- /dev/null +++ b/stracer.py @@ -0,0 +1,163 @@ +import subprocess +import re +from client import unspice_channels + +def parse_connect(line): + """'{sa_family=AF_INET, sin_port=htons(5926), sin_addr=inet_addr("127.0.0.1")}, 16) = 0\n'""" + m = re.match('\s+\{sa_family=(?P<family>[A-Z_]+),\s+sin_port=htons\((?P<port>[0-9]+)\),\s+sin_addr=inet_addr\("(?P<host>[0-9.]+)"\)\},\s+([0-9]+)\)\s+=\s+(?P<ret>[0-9])+\n', line) + if not m: return m + return m.groupdict() + +def from_quoted(s): + return ''.join(chr(int(s[i+2:i+4], 16)) for i in + xrange(0, len(s), 4)) + +def match_message_container(line, rexp): + m = re.match(rexp, line) + if not m: return m + d = m.groupdict() + d['msg'] = from_quoted(d['msg']) + d['size'] = int(d['size']) + if 'ret' in d: + d['ret'] = int(d['ret']) + assert(len(d['msg']) == d['ret']) + d['type'] = 'sendto' + return d + +def parse_sendto(line): + """"\x14\x00\x00\x00\x16\x00\x01\x03\x06\x83\x59\x4c\x00\x00\x00\x00\x00\x00\x00\x00", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20""" + if 'unfinished' in line: + return match_message_container(line, '\s+"(?P<msg>[x0-9a-f\\\\]+)"\.?\.?\.?,\s+(?P<size>[0-9]+),\s+([0-9]+),\s+.+,\s+([0-9]+) <unfinished ...>\n') + return match_message_container(line, '\s+"(?P<msg>[x0-9a-f\\\\]+)"\.?\.?\.?,\s+(?P<size>[0-9]+),\s+([0-9]+),\s+.+,\s+([0-9]+)\)\s+=\s+(?P<ret>[0-9]+)\n') + +def parse_recvfrom(line): + """"\x00\x00\x00\x00\xd2\x01\x00\x00\x1d\x01\x00\x00\x80\x02\x00\x00\x30\x02\x00\x00\x00\xba\x02\x00\x00\xb6\x01\x00\x00", 29, 0, NULL, NULL) = 29""" + return match_message_container(line, '\s+"(?P<msg>[x0-9a-f\\\\]+)"\.?\.?\.?,\s+(?P<size>[0-9]+),\s+([0-9]+),.+,.+\)\s+=\s+(?P<ret>[0-9]+)\n') + +def parse_recvmsg(line): + print "TODO - parse_recvmsg" + return + +def parse_sendmsg(line): + print "TODO - parse_sendmsg" + return + +def parse_start(line): + # really lame parsing, won't deal with nested first arguments - but + # works for all the system calls we are interested in. Actually just + # adding nesting level to each char would solve this. + m = re.search('\[pid\s+([0-9]+)\]\s+([a-zA-Z0-9]+)\(([^,]+),', line) + return m + +class StracerNetwork(object): + + _unhandled_syscalls = set(['socket', 'bind', 'getsockname', 'setsockopt', + # later + 'getpeername', 'getsockopt', 'shutdown']) + def __init__(self, cmd, on_message = lambda x: None): + self.max_bytes_per_message = max_bytes = 2**20 + self._p = subprocess.Popen( + ('strace -qvxx -e trace=network -s %s -Cf %s' % (max_bytes, cmd)).split(), + stderr=subprocess.PIPE) + self.connections = [] + self.sockets = {} + self._message_count = 0 + self._lines = [] + self.on_message = on_message + + def total_length(self, n): + if n not in self.sockets: + return 0 + return sum([len(d['msg']) for m, d in self.sockets[n] if d is not None and 'msg' in d]) + + def terse(self, count=10): + while sum([len(v) for v in self.sockets.values()]) < count: + print sum(map(len, self.sockets.items())) + self.handle_line() + sorted_keys = list(sorted(self.sockets.keys())) + return sorted_keys, [self.terse_key(k) for k in sorted_keys] + + def terse_key(self, key): + return [(dict(recvfrom='R',sendto='S').get(tag), d['msg']) + for ((tag, line), d) in self.sockets[key] if d is not None] + + + def conversations(self, count=10): + keys, terses = self.terse(count=count) + return keys, [[''.join([msg for t,msg in terse if t==k]) for k in 'SR'] + for terse in terses] + + def spiced(self, count=10): + keys, collecteds = self.conversations(count) + return unspice_channels(keys, collecteds) + + def lines(self): + for line in self._p.stderr: + print line if len(line) < 100 else line[:100] + '...' + self._lines.append(line) + yield line + + def wait_for_connect(self, host): + while not self.host_connected(host): + self.handle_line() + + def host_connected(self, host): + return any([d['host'] == host for d in self.connections]) + + def add_to_socket(self, sock, tag, line, parser): + if sock not in self.sockets: + print "did I miss a connect?? at %s" % len(self._lines) + self.add_socket(sock) + parsed = parser(line) + self.sockets[sock].append(((tag, line), parsed)) + self._message_count += 1 + if parsed: + self.on_message(parsed['msg']) + + def add_socket(self, sock, d={}): + d['sock'] = sock + if 'host' not in d: + d['host'] = 'unknown' + self.connections.append(d) + self.sockets[sock] = [] + + def handle_line(self): + line = self.lines().next() + m = parse_start(line) + if not m: return + line = line[m.end():] + pid, syscall, firstarg = m.groups() + if 'connect' in syscall: + d = parse_connect(line) + if not d: return + self.add_socket(int(firstarg), d) + elif 'sendto' in syscall: + self.add_to_socket(int(firstarg), 'sendto', line, parse_sendto) + elif 'recvmsg' in syscall: + self.add_to_socket(int(firstarg), 'recvmsg', line, parse_recvmsg) + elif 'recvfrom' in syscall: + self.add_to_socket(int(firstarg), 'recvfrom', line, parse_recvfrom) + elif 'sendmsg' in syscall: + self.add_to_socket(int(firstarg), 'sendmsg', line, parse_sendmsg) + elif syscall in self._unhandled_syscalls: + pass + else: + print "unhandled %s" % syscall + import pdb; pdb.set_trace() + + def wait_for(self, num): + ret = [] + start_count = self._message_count + while True: + self.handle_line() + if self._message_count - start_count >= num: + return + +def trace_spicec(host, port): + s = StracerNetwork('/store/upstream/bin/spicec -h %s -p %s' % (host, port)) + sock = s.wait_for_connect(host) + return s.filter(sock, 10) + +if __name__ == '__main__': + trace_spicec('127.0.0.1', 5926) + |