summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Corbet <corbet@lwn.net>2012-04-06 16:00:04 -0600
committerJonathan Corbet <corbet@lwn.net>2012-04-06 16:00:04 -0600
commit1e293bc90a2a0d53340c96e74c9391049f4e7165 (patch)
tree1f2b9b585ca8bb04833a1651b17cf5c0691d4885
parent2797636b98f525dea6142840d2f79fb148626766 (diff)
Add version tracking support and an "unknown hackers" report
Version tracking was used to see who had contributed to the most kernel releases; not sure it's a long-term-useful feature. The unknown hackers report helps when trying to improve the database. Signed-off-by: Jonathan Corbet <corbet@lwn.net>
-rwxr-xr-xgitdm14
-rw-r--r--patterns.py3
-rw-r--r--reports.py72
3 files changed, 82 insertions, 7 deletions
diff --git a/gitdm b/gitdm
index 1367ebd..25ffafb 100755
--- a/gitdm
+++ b/gitdm
@@ -5,8 +5,8 @@
#
# This code is part of the LWN git data miner.
#
-# Copyright 2007-11 Eklektix, Inc.
-# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
+# Copyright 2007-12 Eklektix, Inc.
+# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net>
# Copyright 2011 Germán Póo-Caamaño <gpoo@gnome.org>
#
# This file may be distributed under the terms of the GNU General
@@ -43,6 +43,7 @@ DirName = ''
Aggregate = 'month'
Numstat = 0
ReportByFileType = 0
+ReportUnknowns = False
#
# Options:
@@ -60,6 +61,7 @@ ReportByFileType = 0
# -r pattern Restrict to files matching pattern
# -s Ignore author SOB lines
# -u Map unknown employers to '(Unknown)'
+# -U Dump unknown hackers in report
# -x file.csv Export raw statistics as CSV
# -w Aggregrate the raw statistics by weeks instead of months
# -y Aggregrate the raw statistics by years instead of months
@@ -69,9 +71,9 @@ def ParseOpts ():
global MapUnknown, DevReports
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat
- global ReportByFileType
+ global ReportByFileType, ReportUnknowns
- opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stuwx:yz')
+ opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stUuwx:yz')
for opt in opts:
if opt[0] == '-a':
AkpmOverLt = 1
@@ -102,6 +104,8 @@ def ParseOpts ():
ReportByFileType = 1
elif opt[0] == '-u':
MapUnknown = 1
+ elif opt[0] == '-U':
+ ReportUnknowns = True
elif opt[0] == '-x':
CSVFile = open (opt[1], 'w')
print "open output file " + opt[1] + "\n"
@@ -492,6 +496,8 @@ if CSVFile:
if DevReports:
reports.DevReports (hlist, TotalChanged, CSCount, TotalRemoved)
+if ReportUnknowns:
+ reports.ReportUnknowns(hlist, CSCount)
reports.EmplReports (elist, TotalChanged, CSCount)
if ReportByFileType and Numstat:
diff --git a/patterns.py b/patterns.py
index 803e532..4d4a347 100644
--- a/patterns.py
+++ b/patterns.py
@@ -21,7 +21,8 @@ import re
_pemail = r'\s+"?([^<"]+)"?\s<([^>]+)>' # just email addr + name
patterns = {
- 'commit': re.compile (r'^commit ([0-9a-f ]+)$'),
+ 'tagcommit': re.compile (r'^commit ([\da-f]+) .*tag: (v[23]\.\d(\.\d\d?)?)'),
+ 'commit': re.compile (r'^commit ([0-9a-f ]+)'),
'author': re.compile (r'^Author:' + _pemail + '$'),
'signed-off-by': re.compile (r'^\s+Signed-off-by:' + _pemail + '.*$'),
'merge': re.compile (r'^Merge:.*$'),
diff --git a/reports.py b/reports.py
index 9b8cce9..bc1e18c 100644
--- a/reports.py
+++ b/reports.py
@@ -3,8 +3,8 @@
#
# This code is part of the LWN git data miner.
#
-# Copyright 2007-11 Eklektix, Inc.
-# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
+# Copyright 2007-12 Eklektix, Inc.
+# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net>
#
# This file may be distributed under the terms of the GNU General
# Public License, version 2.
@@ -58,6 +58,10 @@ TRow = ''' <tr class="%s">
<td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>
'''
+TRowStr = ''' <tr class="%s">
+<td>%s</td><td align="right">%d</td><td>%s</td></tr>
+'''
+
def ReportLine (text, count, pct):
global HTMLclass
if count == 0:
@@ -67,6 +71,15 @@ def ReportLine (text, count, pct):
HTMLfile.write (TRow % (HClasses[HTMLclass], text, count, pct))
HTMLclass ^= 1
+def ReportLineStr (text, count, extra):
+ global HTMLclass
+ if count == 0:
+ return
+ Outfile.write ('%-25s %4d %s\n' % (text, count, extra))
+ if HTMLfile:
+ HTMLfile.write (TRowStr % (HClasses[HTMLclass], text, count, extra))
+ HTMLclass ^= 1
+
def EndReport ():
if HTMLfile:
HTMLfile.write ('</table>\n\n')
@@ -284,6 +297,34 @@ def ReportByRepCreds (hlist):
break
EndReport ()
+#
+# Versions.
+#
+def CompareVersionCounts (h1, h2):
+ if h1.versions and h2.versions:
+ return len (h2.versions) - len (h1.versions)
+ if h2.versions:
+ return 1
+ if h1.versions:
+ return -1
+ return 0
+
+def MissedVersions (hv, allv):
+ missed = [v for v in allv if v not in hv]
+ missed.reverse ()
+ return ' '.join (missed)
+
+def ReportVersions (hlist):
+ hlist.sort (CompareVersionCounts)
+ BeginReport ('Developers represented in the most kernel versions')
+ count = 0
+ allversions = hlist[0].versions
+ for h in hlist:
+ ReportLineStr (h.name, len (h.versions), MissedVersions (h.versions, allversions))
+ count += 1
+ if count >= ListCount:
+ break
+ EndReport ()
def CompareESOBs (e1, e2):
@@ -341,6 +382,33 @@ def EmplReports (elist, totalchanged, cscount):
ReportByESOBs (elist)
ReportByEHackers (elist)
+#
+# Who are the unknown hackers?
+#
+def IsUnknown(h):
+ empl = h.employer[0][0][1].name
+ return h.email[0] == empl or empl == '(Unknown)'
+
+def ReportUnknowns(hlist, cscount):
+ #
+ # Trim the list to just the unknowns; try to work properly whether
+ # mapping to (Unknown) is happening or not.
+ #
+ ulist = [ h for h in hlist if IsUnknown(h) ]
+ ulist.sort(ComparePCount)
+ count = 0
+ BeginReport('Developers with unknown affiliation')
+ for h in ulist:
+ pcount = len(h.patches)
+ if pcount > 0:
+ ReportLine(h.name, pcount, (pcount*100.0)/cscount)
+ count += 1
+ if count >= ListCount:
+ break
+ EndReport()
+
+
+
def ReportByFileType (hacker_list):
total = {}
total_by_hacker = {}