summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGermán Póo-Caamaño <gpoo@gnome.org>2011-06-22 18:57:02 -0700
committerGermán Póo-Caamaño <gpoo@gnome.org>2011-06-22 19:27:47 -0700
commit7b26ae210995a6a746dbfb20ede382bee7808dc9 (patch)
treeb71ff81874ee2e15cd42b45be4fa6979158324ae
parentefcc42015375e79d27c2a6ac9e290d16d06ccf81 (diff)
Move out the grabpatch from the parser
The class LogPatchSplitter provides an iterator per patch. This makes the code cleaner, easier to read and more pythonic. The class only gets each commit set as lines. It is possible to test it separately by: $ git log | python logparser.py | more Signed-off-by: Germán Póo-Caamaño <gpoo@gnome.org>
-rwxr-xr-xgitdm36
-rw-r--r--logparser.py90
2 files changed, 102 insertions, 24 deletions
diff --git a/gitdm b/gitdm
index fa7e6ba..24b5c96 100755
--- a/gitdm
+++ b/gitdm
@@ -1,11 +1,12 @@
#!/usr/bin/python
-#
+#-*- coding:utf-8 -*-
#
# This code is part of the LWN git data miner.
#
# Copyright 2007-11 Eklektix, Inc.
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
+# Copyright 2011 Germán Póo-Caamaño <gpoo@gnome.org>
#
# This file may be distributed under the terms of the GNU General
# Public License, version 2.
@@ -15,6 +16,7 @@ import database, csvdump, ConfigFile, reports
import getopt, datetime
import os, re, sys, rfc822, string
import file_types
+import logparser
from patterns import patterns
Today = datetime.date.today()
@@ -204,29 +206,14 @@ def parse_numstat(line, file_filter):
#
# The core hack for grabbing the information about a changeset.
#
-def grabpatch():
- global NextLine
-
- while (1):
- m = patterns['commit'].match (NextLine)
- if m:
- break;
- NextLine = sys.stdin.readline ()
- if not NextLine:
- return
+def grabpatch(logpatch):
+ m = patterns['commit'].match (logpatch[0])
+ if not m:
+ return None
p = patch(m.group (1))
- NextLine = sys.stdin.readline ()
ignore = (FileFilter is not None)
- while NextLine:
- Line = NextLine
- #
- # If this line starts a new commit, drop out.
- #
- m = patterns['commit'].match (Line)
- if m:
- break
- NextLine = sys.stdin.readline ()
+ for Line in logpatch[1:]:
#
# Maybe it's an author line?
#
@@ -379,7 +366,6 @@ if AkpmOverLt == 1:
Akpm = ('akpm@linux-foundation.org',
LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org'))
-NextLine = sys.stdin.readline ()
TotalChanged = TotalAdded = TotalRemoved = 0
#
@@ -387,12 +373,14 @@ TotalChanged = TotalAdded = TotalRemoved = 0
#
print >> sys.stderr, 'Grabbing changesets...\r',
+patches = logparser.LogPatchSplitter(sys.stdin)
printcount = CSCount = 0
-while (1):
+
+for logpatch in patches:
if (printcount % 50) == 0:
print >> sys.stderr, 'Grabbing changesets...%d\r' % printcount,
printcount += 1
- p = grabpatch()
+ p = grabpatch(logpatch)
if not p:
break
# if p.added > 100000 or p.removed > 100000:
diff --git a/logparser.py b/logparser.py
new file mode 100644
index 0000000..b375034
--- /dev/null
+++ b/logparser.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+#
+# Copyright © 2009 Germán Póo-Caamaño <gpoo@gnome.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+import sys
+from patterns import patterns
+
+class LogPatchSplitter:
+ """
+ LogPatchSplitters provides a iterator to extract every
+ changeset from a git log output.
+
+ Typical use case:
+
+ patches = LogPatchSplitter(sys.stdin)
+
+ for patch in patches:
+ parse_patch(patch)
+ """
+
+ def __init__(self, fd):
+ self.fd = fd
+ self.buffer = None
+ self.patch = []
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ patch = self.__grab_patch__()
+ if not patch:
+ raise StopIteration
+ return patch
+
+ def __grab_patch__(self):
+ """
+ Extract a patch from the file descriptor and the
+ patch is returned as a list of lines.
+ """
+
+ patch = []
+ line = self.buffer or self.fd.readline()
+
+ while line:
+ m = patterns['commit'].match(line)
+ if m:
+ patch = [line]
+ break
+ line = self.fd.readline()
+
+ if not line:
+ return None
+
+ line = self.fd.readline()
+ while line:
+ # If this line starts a new commit, drop out.
+ m = patterns['commit'].match(line)
+ if m:
+ self.buffer = line
+ break
+
+ patch.append(line)
+ self.buffer = None
+ line = self.fd.readline()
+
+ return patch
+
+
+if __name__ == '__main__':
+ patches = LogPatchSplitter(sys.stdin)
+
+ for patch in patches:
+ print '---------- NEW PATCH ----------'
+ for line in patch:
+ print line,