diff options
author | Jonathan Corbet <corbet@lwn.net> | 2012-04-06 16:00:04 -0600 |
---|---|---|
committer | Jonathan Corbet <corbet@lwn.net> | 2012-04-06 16:00:04 -0600 |
commit | 1e293bc90a2a0d53340c96e74c9391049f4e7165 (patch) | |
tree | 1f2b9b585ca8bb04833a1651b17cf5c0691d4885 | |
parent | 2797636b98f525dea6142840d2f79fb148626766 (diff) |
Add version tracking support and an "unknown hackers" report
Version tracking was used to see who had contributed to the most kernel
releases; not sure it's a long-term-useful feature. The unknown hackers
report helps when trying to improve the database.
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
-rwxr-xr-x | gitdm | 14 | ||||
-rw-r--r-- | patterns.py | 3 | ||||
-rw-r--r-- | reports.py | 72 |
3 files changed, 82 insertions, 7 deletions
@@ -5,8 +5,8 @@ # # This code is part of the LWN git data miner. # -# Copyright 2007-11 Eklektix, Inc. -# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net> +# Copyright 2007-12 Eklektix, Inc. +# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net> # Copyright 2011 Germán Póo-Caamaño <gpoo@gnome.org> # # This file may be distributed under the terms of the GNU General @@ -43,6 +43,7 @@ DirName = '' Aggregate = 'month' Numstat = 0 ReportByFileType = 0 +ReportUnknowns = False # # Options: @@ -60,6 +61,7 @@ ReportByFileType = 0 # -r pattern Restrict to files matching pattern # -s Ignore author SOB lines # -u Map unknown employers to '(Unknown)' +# -U Dump unknown hackers in report # -x file.csv Export raw statistics as CSV # -w Aggregrate the raw statistics by weeks instead of months # -y Aggregrate the raw statistics by years instead of months @@ -69,9 +71,9 @@ def ParseOpts (): global MapUnknown, DevReports global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat - global ReportByFileType + global ReportByFileType, ReportUnknowns - opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stuwx:yz') + opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stUuwx:yz') for opt in opts: if opt[0] == '-a': AkpmOverLt = 1 @@ -102,6 +104,8 @@ def ParseOpts (): ReportByFileType = 1 elif opt[0] == '-u': MapUnknown = 1 + elif opt[0] == '-U': + ReportUnknowns = True elif opt[0] == '-x': CSVFile = open (opt[1], 'w') print "open output file " + opt[1] + "\n" @@ -492,6 +496,8 @@ if CSVFile: if DevReports: reports.DevReports (hlist, TotalChanged, CSCount, TotalRemoved) +if ReportUnknowns: + reports.ReportUnknowns(hlist, CSCount) reports.EmplReports (elist, TotalChanged, CSCount) if ReportByFileType and Numstat: diff --git a/patterns.py b/patterns.py index 803e532..4d4a347 100644 --- a/patterns.py +++ b/patterns.py @@ -21,7 +21,8 @@ import re _pemail = r'\s+"?([^<"]+)"?\s<([^>]+)>' # just email addr + name patterns = { - 'commit': re.compile (r'^commit ([0-9a-f ]+)$'), + 'tagcommit': re.compile (r'^commit ([\da-f]+) .*tag: (v[23]\.\d(\.\d\d?)?)'), + 'commit': re.compile (r'^commit ([0-9a-f ]+)'), 'author': re.compile (r'^Author:' + _pemail + '$'), 'signed-off-by': re.compile (r'^\s+Signed-off-by:' + _pemail + '.*$'), 'merge': re.compile (r'^Merge:.*$'), @@ -3,8 +3,8 @@ # # This code is part of the LWN git data miner. # -# Copyright 2007-11 Eklektix, Inc. -# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net> +# Copyright 2007-12 Eklektix, Inc. +# Copyright 2007-12 Jonathan Corbet <corbet@lwn.net> # # This file may be distributed under the terms of the GNU General # Public License, version 2. @@ -58,6 +58,10 @@ TRow = ''' <tr class="%s"> <td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr> ''' +TRowStr = ''' <tr class="%s"> +<td>%s</td><td align="right">%d</td><td>%s</td></tr> +''' + def ReportLine (text, count, pct): global HTMLclass if count == 0: @@ -67,6 +71,15 @@ def ReportLine (text, count, pct): HTMLfile.write (TRow % (HClasses[HTMLclass], text, count, pct)) HTMLclass ^= 1 +def ReportLineStr (text, count, extra): + global HTMLclass + if count == 0: + return + Outfile.write ('%-25s %4d %s\n' % (text, count, extra)) + if HTMLfile: + HTMLfile.write (TRowStr % (HClasses[HTMLclass], text, count, extra)) + HTMLclass ^= 1 + def EndReport (): if HTMLfile: HTMLfile.write ('</table>\n\n') @@ -284,6 +297,34 @@ def ReportByRepCreds (hlist): break EndReport () +# +# Versions. +# +def CompareVersionCounts (h1, h2): + if h1.versions and h2.versions: + return len (h2.versions) - len (h1.versions) + if h2.versions: + return 1 + if h1.versions: + return -1 + return 0 + +def MissedVersions (hv, allv): + missed = [v for v in allv if v not in hv] + missed.reverse () + return ' '.join (missed) + +def ReportVersions (hlist): + hlist.sort (CompareVersionCounts) + BeginReport ('Developers represented in the most kernel versions') + count = 0 + allversions = hlist[0].versions + for h in hlist: + ReportLineStr (h.name, len (h.versions), MissedVersions (h.versions, allversions)) + count += 1 + if count >= ListCount: + break + EndReport () def CompareESOBs (e1, e2): @@ -341,6 +382,33 @@ def EmplReports (elist, totalchanged, cscount): ReportByESOBs (elist) ReportByEHackers (elist) +# +# Who are the unknown hackers? +# +def IsUnknown(h): + empl = h.employer[0][0][1].name + return h.email[0] == empl or empl == '(Unknown)' + +def ReportUnknowns(hlist, cscount): + # + # Trim the list to just the unknowns; try to work properly whether + # mapping to (Unknown) is happening or not. + # + ulist = [ h for h in hlist if IsUnknown(h) ] + ulist.sort(ComparePCount) + count = 0 + BeginReport('Developers with unknown affiliation') + for h in ulist: + pcount = len(h.patches) + if pcount > 0: + ReportLine(h.name, pcount, (pcount*100.0)/cscount) + count += 1 + if count >= ListCount: + break + EndReport() + + + def ReportByFileType (hacker_list): total = {} total_by_hacker = {} |