summaryrefslogtreecommitdiff
path: root/stracer.py
diff options
context:
space:
mode:
Diffstat (limited to 'stracer.py')
-rw-r--r--stracer.py163
1 files changed, 163 insertions, 0 deletions
diff --git a/stracer.py b/stracer.py
new file mode 100644
index 0000000..60b735f
--- /dev/null
+++ b/stracer.py
@@ -0,0 +1,163 @@
+import subprocess
+import re
+from client import unspice_channels
+
+def parse_connect(line):
+ """'{sa_family=AF_INET, sin_port=htons(5926), sin_addr=inet_addr("127.0.0.1")}, 16) = 0\n'"""
+ m = re.match('\s+\{sa_family=(?P<family>[A-Z_]+),\s+sin_port=htons\((?P<port>[0-9]+)\),\s+sin_addr=inet_addr\("(?P<host>[0-9.]+)"\)\},\s+([0-9]+)\)\s+=\s+(?P<ret>[0-9])+\n', line)
+ if not m: return m
+ return m.groupdict()
+
+def from_quoted(s):
+ return ''.join(chr(int(s[i+2:i+4], 16)) for i in
+ xrange(0, len(s), 4))
+
+def match_message_container(line, rexp):
+ m = re.match(rexp, line)
+ if not m: return m
+ d = m.groupdict()
+ d['msg'] = from_quoted(d['msg'])
+ d['size'] = int(d['size'])
+ if 'ret' in d:
+ d['ret'] = int(d['ret'])
+ assert(len(d['msg']) == d['ret'])
+ d['type'] = 'sendto'
+ return d
+
+def parse_sendto(line):
+ """"\x14\x00\x00\x00\x16\x00\x01\x03\x06\x83\x59\x4c\x00\x00\x00\x00\x00\x00\x00\x00", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20"""
+ if 'unfinished' in line:
+ return match_message_container(line, '\s+"(?P<msg>[x0-9a-f\\\\]+)"\.?\.?\.?,\s+(?P<size>[0-9]+),\s+([0-9]+),\s+.+,\s+([0-9]+) <unfinished ...>\n')
+ return match_message_container(line, '\s+"(?P<msg>[x0-9a-f\\\\]+)"\.?\.?\.?,\s+(?P<size>[0-9]+),\s+([0-9]+),\s+.+,\s+([0-9]+)\)\s+=\s+(?P<ret>[0-9]+)\n')
+
+def parse_recvfrom(line):
+ """"\x00\x00\x00\x00\xd2\x01\x00\x00\x1d\x01\x00\x00\x80\x02\x00\x00\x30\x02\x00\x00\x00\xba\x02\x00\x00\xb6\x01\x00\x00", 29, 0, NULL, NULL) = 29"""
+ return match_message_container(line, '\s+"(?P<msg>[x0-9a-f\\\\]+)"\.?\.?\.?,\s+(?P<size>[0-9]+),\s+([0-9]+),.+,.+\)\s+=\s+(?P<ret>[0-9]+)\n')
+
+def parse_recvmsg(line):
+ print "TODO - parse_recvmsg"
+ return
+
+def parse_sendmsg(line):
+ print "TODO - parse_sendmsg"
+ return
+
+def parse_start(line):
+ # really lame parsing, won't deal with nested first arguments - but
+ # works for all the system calls we are interested in. Actually just
+ # adding nesting level to each char would solve this.
+ m = re.search('\[pid\s+([0-9]+)\]\s+([a-zA-Z0-9]+)\(([^,]+),', line)
+ return m
+
+class StracerNetwork(object):
+
+ _unhandled_syscalls = set(['socket', 'bind', 'getsockname', 'setsockopt',
+ # later
+ 'getpeername', 'getsockopt', 'shutdown'])
+ def __init__(self, cmd, on_message = lambda x: None):
+ self.max_bytes_per_message = max_bytes = 2**20
+ self._p = subprocess.Popen(
+ ('strace -qvxx -e trace=network -s %s -Cf %s' % (max_bytes, cmd)).split(),
+ stderr=subprocess.PIPE)
+ self.connections = []
+ self.sockets = {}
+ self._message_count = 0
+ self._lines = []
+ self.on_message = on_message
+
+ def total_length(self, n):
+ if n not in self.sockets:
+ return 0
+ return sum([len(d['msg']) for m, d in self.sockets[n] if d is not None and 'msg' in d])
+
+ def terse(self, count=10):
+ while sum([len(v) for v in self.sockets.values()]) < count:
+ print sum(map(len, self.sockets.items()))
+ self.handle_line()
+ sorted_keys = list(sorted(self.sockets.keys()))
+ return sorted_keys, [self.terse_key(k) for k in sorted_keys]
+
+ def terse_key(self, key):
+ return [(dict(recvfrom='R',sendto='S').get(tag), d['msg'])
+ for ((tag, line), d) in self.sockets[key] if d is not None]
+
+
+ def conversations(self, count=10):
+ keys, terses = self.terse(count=count)
+ return keys, [[''.join([msg for t,msg in terse if t==k]) for k in 'SR']
+ for terse in terses]
+
+ def spiced(self, count=10):
+ keys, collecteds = self.conversations(count)
+ return unspice_channels(keys, collecteds)
+
+ def lines(self):
+ for line in self._p.stderr:
+ print line if len(line) < 100 else line[:100] + '...'
+ self._lines.append(line)
+ yield line
+
+ def wait_for_connect(self, host):
+ while not self.host_connected(host):
+ self.handle_line()
+
+ def host_connected(self, host):
+ return any([d['host'] == host for d in self.connections])
+
+ def add_to_socket(self, sock, tag, line, parser):
+ if sock not in self.sockets:
+ print "did I miss a connect?? at %s" % len(self._lines)
+ self.add_socket(sock)
+ parsed = parser(line)
+ self.sockets[sock].append(((tag, line), parsed))
+ self._message_count += 1
+ if parsed:
+ self.on_message(parsed['msg'])
+
+ def add_socket(self, sock, d={}):
+ d['sock'] = sock
+ if 'host' not in d:
+ d['host'] = 'unknown'
+ self.connections.append(d)
+ self.sockets[sock] = []
+
+ def handle_line(self):
+ line = self.lines().next()
+ m = parse_start(line)
+ if not m: return
+ line = line[m.end():]
+ pid, syscall, firstarg = m.groups()
+ if 'connect' in syscall:
+ d = parse_connect(line)
+ if not d: return
+ self.add_socket(int(firstarg), d)
+ elif 'sendto' in syscall:
+ self.add_to_socket(int(firstarg), 'sendto', line, parse_sendto)
+ elif 'recvmsg' in syscall:
+ self.add_to_socket(int(firstarg), 'recvmsg', line, parse_recvmsg)
+ elif 'recvfrom' in syscall:
+ self.add_to_socket(int(firstarg), 'recvfrom', line, parse_recvfrom)
+ elif 'sendmsg' in syscall:
+ self.add_to_socket(int(firstarg), 'sendmsg', line, parse_sendmsg)
+ elif syscall in self._unhandled_syscalls:
+ pass
+ else:
+ print "unhandled %s" % syscall
+ import pdb; pdb.set_trace()
+
+ def wait_for(self, num):
+ ret = []
+ start_count = self._message_count
+ while True:
+ self.handle_line()
+ if self._message_count - start_count >= num:
+ return
+
+def trace_spicec(host, port):
+ s = StracerNetwork('/store/upstream/bin/spicec -h %s -p %s' % (host, port))
+ sock = s.wait_for_connect(host)
+ return s.filter(sock, 10)
+
+if __name__ == '__main__':
+ trace_spicec('127.0.0.1', 5926)
+