summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Meeks <michael.meeks@novell.com>2008-09-29 17:46:37 +0100
committerJonathan Corbet <corbet@lwn.net>2008-10-06 14:29:27 -0600
commitd1a8929872216089864d4f28f9c27b2a648d7302 (patch)
tree28bd27683bb68e6cdd85e2f31728db1eeb7673cd
parent558dbe1cbef23cc2ccb05e70f5b0e3db9fb497b5 (diff)
gitdm patch ...
Hi guys, I knocked up a patch to generate some per-month, by-affiliation statistics from the gitdm output; attached for interest or merging. A sample of the output, complete with OO.o data-pilot, and pretty chart is here: http://www.gnome.org/~michael/data/2008-09-29-linux-stats.ods with chart here: http://www.gnome.org/~michael/images/2008-09-29-kernel-active.png caption being: "Graph showing number and affiliation of active kernel developers (contributing more than 100 lines per month). Quick affiliation key, from bottom up: Unknown, No-Affiliation, IBM, RedHat, Novell, Intel ..." These are as yet not published, I plan to use them as a comparison to OO.o's somewhat mediocre equivalents; hope to go live with them soon (and fix the horrible bugs in stacked area charts to make them actually pretty ). HTH, Michael. -- michael.meeks@novell.com <><, Pseudo Engineer, itinerant idiot Signed-off-by: Jonathan Corbet <corbet@lwn.net>
-rw-r--r--csv.py38
-rwxr-xr-xgitdm17
2 files changed, 51 insertions, 4 deletions
diff --git a/csv.py b/csv.py
new file mode 100644
index 0000000..34ea10a
--- /dev/null
+++ b/csv.py
@@ -0,0 +1,38 @@
+#
+# aggregate per-month statistics for people
+#
+import sys, datetime
+
+class CSVStat:
+ def __init__ (self, name, employer, date):
+ self.name = name
+ self.employer = employer
+ self.added = self.removed = 0
+ self.date = date
+ def accumulate (self, p):
+ self.added = self.added + p.added
+ self.removed = self.removed + p.removed
+
+PeriodCommitHash = { }
+
+def AccumulatePatch (p):
+ date = "%.2d-%.2d-01"%(p.date.year, p.date.month)
+ authdatekey = "%s-%s"%(p.author.name, date)
+ if authdatekey not in PeriodCommitHash:
+ empl = p.author.emailemployer (p.email, p.date)
+ stat = CSVStat (p.author.name, empl, date)
+ PeriodCommitHash[authdatekey] = stat
+ else:
+ stat = PeriodCommitHash[authdatekey]
+ stat.accumulate (p)
+
+def OutputCSV (file):
+ if file is None:
+ return
+ file.write ("Name\tAffliation\tDate\tAdded\tRemoved\n")
+ for date, stat in PeriodCommitHash.items():
+ # sanitise names " is common and \" sometimes too
+ empl_name = stat.employer.name.replace ("\"", ".").replace ("\\", ".")
+ author_name = stat.name.replace ("\"", ".").replace ("\\", ".")
+ file.write ("\"%s\"\t\"%s\"\t%s\t%d\t%d\n"%(author_name, empl_name, stat.date, \
+ stat.added, stat.removed))
diff --git a/gitdm b/gitdm
index 20ec257..1c3adc7 100755
--- a/gitdm
+++ b/gitdm
@@ -11,7 +11,7 @@
# Public License, version 2.
-import database, ConfigFile
+import database, csv, ConfigFile
import getopt, datetime
import os, re, sys, rfc822, string
from patterns import *
@@ -30,6 +30,7 @@ DevReports = 1
DateStats = 0
AuthorSOBs = 1
FileFilter = None
+CSVFile = None
AkpmOverLt = 0
DumpDB = 0
CFName = 'gitdm.config'
@@ -46,14 +47,15 @@ CFName = 'gitdm.config'
# -r pattern Restrict to files matching pattern
# -s Ignore author SOB lines
# -u Map unknown employers to '(Unknown)'
+# -x file.csv Export raw statistics as CSV
# -z Dump out the hacker database at completion
def ParseOpts ():
global Outfile, ListCount, MapUnknown, HTMLfile, DevReports
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
- global CFName
+ global CFName, CSVFile
- opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:suz')
+ opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:sux:z')
for opt in opts:
if opt[0] == '-a':
AkpmOverLt = 1
@@ -76,6 +78,9 @@ def ParseOpts ():
AuthorSOBs = 0
elif opt[0] == '-u':
MapUnknown = 1
+ elif opt[0] == '-x':
+ CSVFile = open (opt[1], 'w')
+ print "open output file " + opt[1] + "\n"
elif opt[0] == '-z':
DumpDB = 1
@@ -260,7 +265,6 @@ THead = '''<p>
<tr><th colspan=3>%s</th></tr>
'''
-
def BeginReport (title):
global HTMLclass
@@ -459,6 +463,7 @@ while (1):
for sobemail, sob in p.sobs:
sob.addsob (p)
CSCount += 1
+ csv.AccumulatePatch (p)
print >> sys.stderr, 'Grabbing changesets...done'
if DumpDB:
@@ -479,6 +484,10 @@ if DateStats:
PrintDateStats ()
sys.exit(0)
+csv.OutputCSV (CSVFile)
+if CSVFile is not None:
+ CSVFile.close ()
+
if DevReports:
ReportByPCount (hlist)
ReportByLChanged (hlist)