summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Corbet <corbet@lwn.net>2009-02-10 13:52:58 -0700
committerJonathan Corbet <corbet@lwn.net>2009-02-10 13:52:58 -0700
commit50e3f2e1eeaec1793c6be8784ec79c392ff681d2 (patch)
tree97afe3c8acf8b12a4bb3ffb5592b98f50d372f96
parentf3fd6b568307871bb7213d32412afce8e6f479e2 (diff)
A quick and dirty treeplot utility
This is a tool to make a graphviz input file describing the patch flow into the mainline.
-rw-r--r--patterns.py2
-rwxr-xr-xtreeplot319
2 files changed, 320 insertions, 1 deletions
diff --git a/patterns.py b/patterns.py
index 4f878d0..1e81869 100644
--- a/patterns.py
+++ b/patterns.py
@@ -33,6 +33,6 @@ Preptest = re.compile (r'reported-and-tested-by:' + Pemail, re.I)
#
# Merges are described with a variety of lines.
#
-PExtMerge = re.compile(r'^ +Merge( branch .* of)? ([^ ]+)\n$')
+PExtMerge = re.compile(r'^ +Merge( branch .* of)? ([^ ]+:[^ ]+)\n$')
PIntMerge = re.compile(r'^ +(Merge|Pull) .* into .*$')
PIntMerge2 = re.compile(r"^ +Merge branch(es)? '.*$")
diff --git a/treeplot b/treeplot
new file mode 100755
index 0000000..f61f8a7
--- /dev/null
+++ b/treeplot
@@ -0,0 +1,319 @@
+#!/usr/bin/python
+#
+# Create a graph of patch flow into the mainline.
+#
+import sys
+import patterns
+
+#
+# The various types of commit we understand.
+#
+class Commit:
+ def __init__(self, id, parent):
+ self.id = id
+ self.parent = parent
+ self.ismerge = 0
+ self.treepriority = 0
+#
+# Merges are special
+#
+class Merge (Commit):
+ def __init__(self, id, parent):
+ Commit.__init__(self, id, parent)
+ self.ismerge = 1
+ self.internal = 1 # Two branches within a repo?
+ self.parents = [ parent ]
+
+ def addparent(self, parentid):
+ self.parents.append(parentid)
+
+ def addtree(self, tree):
+ self.tree = tree
+ self.internal = 0
+
+#
+# Trees: where the commits come from.
+#
+class Tree:
+ def __init__(self, name, url):
+ self.name = name
+ self.url = url
+ self.inputs = [ ]
+ self.commits = [ ]
+
+ def addcommit(self, id):
+ self.commits.append(id)
+
+ def addinput(self, tree):
+ if tree not in self.inputs:
+ self.inputs.append(tree)
+ # print '%s -> %s' % (tree.name, self.name)
+
+Mainline = Tree('Mainline',
+ 'git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git')
+KnownTrees = { Mainline.url: Mainline }
+
+def NormalizeURL(url):
+ if url[:4] == 'git:':
+ return url
+ if url == '../net-2.6/':
+ url = 'git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6'
+ url = url.replace('master.kernel.org:', 'git://git.kernel.org')
+ if url[-18:] == 'torvalds/linux-2.6':
+ url += '.git'
+ if url[:8] == '/pub/scm':
+ url = 'git://git.kernel.org' + url
+ return url
+
+def LookupTree(url):
+ url = NormalizeURL(url)
+ try:
+ return KnownTrees[url]
+ except KeyError:
+ tree = Tree(url, url)
+ KnownTrees[url] = tree
+ return tree
+
+#
+# We track which tree every commit belongs to.
+#
+CommitTrees = { }
+class CTEntry:
+ def __init__ (self, tree, priority, path):
+ self.tree = tree
+ self.priority = priority
+ self.path = path
+
+def AddCommitTree(id, entry):
+# print 'add: ', id, '[',
+# for tree in entry.path:
+# print tree.name,
+# print ']'
+ try:
+ oldentry = CommitTrees[id]
+ if entry.priority < oldentry.priority:
+ CommitTrees[id] = entry
+ except KeyError:
+ CommitTrees[id] = entry
+
+
+def LookupCommitTree(id):
+ try:
+ return CommitTrees[id]
+ except KeyError:
+ print 'Unfound commit %s' % (id)
+ return CTEntry (Mainline, 0, [])
+
+#
+# Input handling with one-line pushback.
+#
+SavedLine = None
+Input = sys.stdin
+
+def GetLine():
+ global SavedLine
+ if SavedLine:
+ ret = SavedLine
+ SavedLine = None
+ return ret
+ return Input.readline()
+
+def SaveLine(line):
+ global SavedLine
+ SavedLine = line
+
+#
+# Pull in a commit and see what it is.
+#
+def GetCommit():
+ #
+ # Skip junk up to the next commit.
+ #
+ while 1:
+ line = GetLine()
+ if not line:
+ return None
+ m = patterns.Pcommit.match(line)
+ if m:
+ break
+
+ #
+ # Look at the commit and see how many parents we have.
+ #
+ ids = m.group(1).split()
+ if len(ids) <= 1:
+ if len(CommitTrees.values()) > 0:
+ print 'No-Parent commit:', ids[0]
+ return GetCommit()
+ print 'Did you run git with --parents?'
+ print ids
+ sys.exit(1)
+ if len(ids) == 2: # Simple commit
+ return Commit(ids[0], ids[1])
+ #
+ # OK, we have a merge.
+ #
+ merge = Merge(ids[0], ids[1])
+ for id in ids[2:]:
+ merge.addparent(id)
+ #
+ # We need to figure out what kind of merge it is, so read through the
+ # descriptive text to the merge line.
+ #
+ while 1:
+ line = GetLine()
+ if not line:
+ print 'EOF looking for merge line'
+ return None
+ #
+ # Maybe it's an external merge?
+ #
+ m = patterns.PExtMerge.match(line)
+ if m:
+ merge.addtree(LookupTree(m.group(2)))
+ return merge
+ #
+ # OK, maybe it's internal
+ #
+ if patterns.PIntMerge.match(line) or patterns.PIntMerge2.match(line):
+ #print 'Internal:', line[:-1]
+ merge.internal = 1
+ return merge
+ m = patterns.Pcommit.match(line)
+ if m:
+ print 'Hit next commit (%s) looking for merge line' % (m.group(1))
+ SaveLine(line)
+ return GetCommit()
+
+#
+# Print out a tree and its inputs
+#
+def PrintTree(tree, indent = ''):
+ print '%s%4d %s' % (indent, len(tree.commits), tree.name)
+ for input in tree.inputs:
+ PrintTree(input, indent + ' ')
+
+#
+# Let's try to build a data structure giving the patch flows.
+#
+class FlowNode:
+ def __init__(self, tree):
+ self.tree = tree
+ self.inputs = { }
+ self.commits = 0
+
+def BuildFlowTree():
+ rootnode = FlowNode(Mainline)
+ for centry in CommitTrees.values():
+ FillFlowPath(centry.path, rootnode)
+ return rootnode
+
+def FillFlowPath(path, node):
+ node.commits += 1
+ if len(path) == 0:
+ return
+ next, rest = path[0], path[1:]
+ try:
+ nextnode = node.inputs[next.name]
+ except KeyError:
+ nextnode = node.inputs[next.name] = FlowNode(next)
+ return FillFlowPath(rest, nextnode)
+
+def PrintFlowTree(ftree, indent = ''):
+ print '%s%3d %s' % (indent, ftree.commits, ftree.tree.name)
+ for input in ftree.inputs.values():
+ PrintFlowTree(input, indent + ' ')
+
+#
+# Something for graphviz
+#
+GVHeader = '''digraph "runtree" {
+graph [ label = "Patch flow into the mainline",
+ concentrate = true,
+ nodesep = 0.1,
+ rankdir = LR ];
+node [shape = polygon,
+ sides = 4,
+ height = 0.3
+ fontsize = 8];
+'''
+
+
+MainlineCommits = 0
+
+def GVTree(ftree):
+ global MainlineCommits
+ MainlineCommits = ftree.commits
+ gvf = open('runtree.gv', 'w')
+ gvf.write(GVHeader)
+ inputs = ftree.inputs.values()
+ inputs.sort(GVSort)
+ for input in inputs:
+ GVPrintNode(gvf, input, 'Mainline')
+ gvf.write('}\n')
+
+def GVNodeName(treename):
+ sname = treename.split('/')
+ if treename.find('kernel.org') >= 0:
+ return '%s/%s' % (sname[-2], sname[-1])
+ sep = treename.find ('://')
+ if sep > 0:
+ return treename[sep+3:]
+ return treename
+
+def GVSort(n1, n2):
+ return n2.commits - n1.commits
+
+def GVPrintNode(gvf, node, parent):
+ name = GVNodeName(node.tree.name)
+ gvf.write ('"%s" -> "%s" [taillabel = "%d", labelfontsize = 8' % (name, parent, node.commits))
+ gvf.write (', arrowsize = 0.5')
+ if MainlineCommits/node.commits < 20:
+ gvf.write(', color = red')
+ elif MainlineCommits/node.commits < 100:
+ gvf.write(', color = orange');
+ gvf.write(']\n')
+ inputs = node.inputs.values()
+ if inputs:
+ inputs.sort(GVSort)
+ for input in inputs:
+ GVPrintNode(gvf, input, name)
+
+#
+# Main code.
+#
+commit = GetCommit()
+ncommits = 0
+while commit:
+ ncommits += 1
+ entry = LookupCommitTree(commit.id)
+ tree = entry.tree
+ priority = entry.priority
+ tree.addcommit(commit.id)
+ #
+ # For regular commits, just remember the tree involved
+ #
+ if not commit.ismerge:
+ AddCommitTree(commit.parent, entry)
+ #
+ # For merges we have to deal with all the parents.
+ #
+ else:
+ AddCommitTree(commit.parents[0], CTEntry (tree, priority, entry.path))
+ if commit.internal:
+ for p in commit.parents[1:]:
+ path = entry.path + [tree]
+ AddCommitTree(p, CTEntry (tree, priority, entry.path))
+ else:
+ for p in commit.parents[1:]:
+ path = entry.path + [commit.tree]
+ AddCommitTree(p, CTEntry (commit.tree, priority + 1, path))
+ if commit.tree is not Mainline:
+ tree.addinput(commit.tree)
+ commit = GetCommit()
+
+#PrintTree(Mainline)
+ftree = BuildFlowTree()
+PrintFlowTree(ftree)
+GVTree(ftree)
+print '%d commits total, %d trees' % (MainlineCommits, len (KnownTrees.keys()))