summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Corbet <corbet@lwn.net>2008-06-27 08:58:35 -0600
committerJonathan Corbet <corbet@lwn.net>2008-06-27 08:58:35 -0600
commite1a6d06d6553c3b2026304f5379c3737f1743e46 (patch)
treeac30cd7941aa0222e1736b790a4c67ec8090695d
Initial commit
First commit of gitdm to the new repo. Call it version 0.10 or something silly like that.
-rw-r--r--.gitignore2
-rw-r--r--COPYING2
-rw-r--r--ConfigFile.py110
-rw-r--r--README107
-rw-r--r--database.py202
-rwxr-xr-xgitdm499
-rw-r--r--gitdm.config22
-rw-r--r--sample-config/aliases5
-rw-r--r--sample-config/domain-map242
9 files changed, 1191 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f3d74a9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+*~
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..fe3eb43
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,2 @@
+The code in this directory can be distributed under the terms of the GNU
+General Public License, version 2.
diff --git a/ConfigFile.py b/ConfigFile.py
new file mode 100644
index 0000000..39310fb
--- /dev/null
+++ b/ConfigFile.py
@@ -0,0 +1,110 @@
+#
+# Stuff for dealing with configuration files.
+#
+import sys, re, datetime
+import database
+
+#
+# Read a line and strip out junk.
+#
+def ReadConfigLine (file):
+ line = file.readline ()
+ if not line:
+ return None
+ line = line.split('#')[0] # Get rid of any comments
+ line = line.strip () # and extra white space
+ if len (line) == 0: # we got rid of everything
+ return ReadConfigLine (file)
+ return line
+
+#
+# Give up and die.
+#
+def croak (message):
+ sys.stderr.write (message + '\n')
+ sys.exit (1)
+
+#
+# Read a list of email aliases.
+#
+def ReadEmailAliases (name):
+ try:
+ file = open (name, 'r')
+ except IOError:
+ croak ('Unable to open email alias file %s' % (name))
+ line = ReadConfigLine (file)
+ while line:
+ sline = line.split ()
+ if len (sline) != 2:
+ croak ('Funky email alias line "%s"' % (line))
+ if sline[0].index ('@') <= 0 or sline[1].index ('@') <= 0:
+ croak ('Non-addresses in email alias "%s"' % (line))
+ database.AddEmailAlias (sline[0], sline[1])
+ line = ReadConfigLine (file)
+ file.close ()
+
+#
+# The Email/Employer map
+#
+EMMpat = re.compile (r'^([^\s]+)\s+([^<]+)\s*(<\s*(\d+-\d+-\d+)\s*)?$')
+
+def ReadEmailEmployers (name):
+ try:
+ file = open (name, 'r')
+ except IOError:
+ croak ('Unable to open email/employer file %s' % (name))
+ line = ReadConfigLine (file)
+ while line:
+ m = EMMpat.match (line)
+ if not m:
+ croak ('Funky email/employer line "%s"' % (line))
+ email = m.group (1)
+ company = m.group (2).strip ()
+ enddate = ParseDate (m.group (4))
+ database.AddEmailEmployerMapping (email, company, enddate)
+ line = ReadConfigLine (file)
+ file.close ()
+
+def ParseDate (cdate):
+ if not cdate:
+ return None
+ sdate = cdate.split ('-')
+ return datetime.date (int (sdate[0]), int (sdate[1]), int (sdate[2]))
+
+
+def ReadGroupMap (fname, employer):
+ try:
+ file = open (fname, 'r')
+ except IOError:
+ croak ('Unable to open group map file %s' % (fname))
+ line = ReadConfigLine (file)
+ while line:
+ database.AddEmailEmployerMapping (line, employer)
+ line = ReadConfigLine (file)
+ file.close ()
+
+#
+# Read an overall config file.
+#
+def ConfigFile (name):
+ try:
+ file = open (name, 'r')
+ except IOError:
+ croak ('Unable to open config file %s' % (name))
+ line = ReadConfigLine (file)
+ while line:
+ sline = line.split ()
+ if len (sline) < 2:
+ croak ('Funky config line: "%s"' % (line))
+ if sline[0] == 'EmailAliases':
+ ReadEmailAliases (sline[1])
+ elif sline[0] == 'EmailMap':
+ ReadEmailEmployers (sline[1])
+ elif sline[0] == 'GroupMap':
+ if len (sline) != 3:
+ croak ('Funky group map line "%s"' % (line))
+ ReadGroupMap (sline[1], sline[2])
+ else:
+ croak ('Unrecognized config line: "%s"' % (line))
+ line = ReadConfigLine (file)
+
diff --git a/README b/README
new file mode 100644
index 0000000..62c8d31
--- /dev/null
+++ b/README
@@ -0,0 +1,107 @@
+The code in this directory makes up the "git data miner," a simple hack
+which attempts to figure things out from the revision history in a git
+repository.
+
+RUNNING GITDM
+
+Run it like this:
+
+ git log -p -M [details] | gitdm [options]
+
+The [details] tell git which changesets are of interest; the [options] can
+be:
+
+ -a If a patch contains signoff lines from both Andrew Morton
+ and Linus Torvalds, omit Linus's.
+
+ -c file Specify the name of the gitdm configuration file.
+ By default, "./gitdm.config" is used.
+
+ -d Omit the developer reports, giving employer information
+ only.
+
+ -D Rather than create the usual statistics, create a
+ file providing lines changed per day, suitable for
+ feeding to a tool like gnuplot.
+
+ -h file Generate HTML output to the given file
+
+ -l num Only list the top <num> entries in each report.
+
+ -o file Write text output to the given file (default is stdout).
+
+ -r pat Only generate statistics for changes to files whose
+ name matches the given regular expression.
+
+ -s Ignore Signed-off-by lines which match the author of
+ each patch.
+
+ -u Group all unknown developers under the "(Unknown)"
+ employer.
+
+ -z Dump out the hacker database to "database.dump".
+
+A typical command line used to generate the "who write 2.6.x" LWN articles
+looks like:
+
+ git log -p -M v2.6.19..v2.6.20 | \
+ gitdm -u -s -a -o results -h results.html
+
+
+CONFIGURATION FILE
+
+The main purpose of the configuration file is to direct the mapping of
+email addresses onto employers. Please note that the config file parser is
+exceptionally stupid and unrobust at this point, but it gets the job done.
+
+Blank lines and lines beginning with "#" are ignored. Everything else
+specifies a file with some sort of mapping:
+
+EmailAliases file
+
+ Developers often post code under a number of different email
+ addresses, but it can be desirable to group them all together in
+ the statistics. An EmailAliases file just contains a bunch of
+ lines of the form:
+
+ alias@address canonical@address
+
+ Any patches originating from alias@address will be treated as if
+ they had come from canonical@address.
+
+
+EmailMap file
+
+ Map email addresses onto employers. These files contain lines
+ like:
+
+ [user@]domain employer [< yyyy-mm-dd]
+
+ If the "user@" portion is missing, all email from the given domain
+ will be treated as being associated with the given employer. If a
+ date is provided, the entry is only valid up to that date;
+ otherwise it is considered valid into the indefinite future. This
+ feature can be useful for properly tracking developers' work when
+ they change employers but do not change email addresses.
+
+
+GroupMap file employer
+
+ This is a variant of EmailMap provided for convenience; it contains
+ email addresses only, all of which are associated with the given
+ employer.
+
+
+NOTES AND CREDITS
+
+Gitdm was written by Jonathan Corbet; many useful contributions have come
+from Greg Kroah-Hartman.
+
+Please note that this tool is provided in the hope that it will be useful,
+but it is not put forward as an example of excellence in design or
+implementation. Hacking on gitdm tends to stop the moment it performs
+whatever task is required of it at the moment. Patches to make it less
+hacky, less ugly, and more robust are welcome.
+
+Jonathan Corbet
+corbet@lwn.net
diff --git a/database.py b/database.py
new file mode 100644
index 0000000..edb54cf
--- /dev/null
+++ b/database.py
@@ -0,0 +1,202 @@
+#
+# The "database".
+#
+
+#
+# This code is part of the LWN git data miner.
+#
+# Copyright 2007 LWN.net
+# Copyright 2007 Jonathan Corbet <corbet@lwn.net>
+#
+# This file may be distributed under the terms of the GNU General
+# Public License, version 2.
+import sys, datetime
+
+
+class Hacker:
+ def __init__ (self, name, id, elist, email):
+ self.name = name
+ self.id = id
+ self.employer = [ elist ]
+ self.email = [ email ]
+ self.added = self.removed = 0
+ self.patches = [ ]
+ self.signoffs = [ ]
+
+ def addemail (self, email, elist):
+ self.email.append (email)
+ self.employer.append (elist)
+ HackersByEmail[email] = self
+
+ def emailemployer (self, email, date):
+ for i in range (0, len (self.email)):
+ if self.email[i] == email:
+ for edate, empl in self.employer[i]:
+ if edate > date:
+ return empl
+ print 'OOPS. ', self.name, self.employer, self.email, email, date
+ return None # Should not happen
+
+ def addpatch (self, patch):
+ self.added += patch.added
+ self.removed += patch.removed
+ self.patches.append (patch)
+
+ def addsob (self, patch):
+ self.signoffs.append (patch)
+
+HackersByName = { }
+HackersByEmail = { }
+HackersByID = { }
+MaxID = 0
+
+def StoreHacker (name, elist, email):
+ global MaxID
+
+ id = MaxID
+ MaxID += 1
+ h = Hacker (name, id, elist, email)
+ HackersByName[name] = h
+ HackersByEmail[email] = h
+ HackersByID[id] = h
+ return h
+
+def LookupEmail (addr):
+ try:
+ return HackersByEmail[addr]
+ except KeyError:
+ return None
+
+def LookupName (name):
+ try:
+ return HackersByName[name]
+ except KeyError:
+ return None
+
+def LookupID (id):
+ try:
+ return HackersByID[id]
+ except KeyError:
+ return None
+
+def AllHackers ():
+ return HackersByID.values ()
+# return [h for h in HackersByID.values ()] # if (h.added + h.removed) > 0]
+
+def DumpDB ():
+ out = open ('database.dump', 'w')
+ names = HackersByName.keys ()
+ names.sort ()
+ for name in names:
+ h = HackersByName[name]
+ out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
+ len (h.patches),
+ h.added, h.removed,
+ len (h.signoffs)))
+ for i in range (0, len (h.email)):
+ out.write ('\t%s -> \n' % (h.email[i]))
+ for date, empl in h.employer[i]:
+ out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
+ empl.name))
+
+#
+# Employer info.
+#
+class Employer:
+ def __init__ (self, name):
+ self.name = name
+ self.added = self.removed = self.count = self.changed = 0
+ self.sobs = 0
+ self.hackers = [ ]
+
+ def AddCSet (self, patch):
+ self.added += patch.added
+ self.removed += patch.removed
+ self.changed += max(patch.added, patch.removed)
+ self.count += 1
+ if patch.author not in self.hackers:
+ self.hackers.append (patch.author)
+
+ def AddSOB (self):
+ self.sobs += 1
+
+Employers = { }
+
+def GetEmployer (name):
+ try:
+ return Employers[name]
+ except KeyError:
+ e = Employer (name)
+ Employers[name] = e
+ return e
+
+def AllEmployers ():
+ return Employers.values ()
+
+#
+# The email map.
+#
+EmailAliases = { }
+
+def AddEmailAlias (variant, canonical):
+ if EmailAliases.has_key (variant):
+ sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
+ EmailAliases[variant] = canonical
+
+def RemapEmail (email):
+ email = email.lower ()
+ try:
+ return EmailAliases[email]
+ except KeyError:
+ return email
+
+#
+# Email-to-employer mapping.
+#
+EmailToEmployer = { }
+nextyear = datetime.date.today () + datetime.timedelta (days = 365)
+
+def AddEmailEmployerMapping (email, employer, end = nextyear):
+ if end is None:
+ end = nextyear
+ email = email.lower ()
+ empl = GetEmployer (employer)
+ try:
+ l = EmailToEmployer[email]
+ print email, l
+ for i in range (0, len(l)):
+ date, xempl = l[i]
+ if date == end: # probably both nextyear
+ print 'WARNING: duplicate email/empl for %s' % (email)
+ if date > end:
+ l.insert (i, (end, empl))
+ return
+ l.append ((end, empl))
+ except KeyError:
+ EmailToEmployer[email] = [(end, empl)]
+
+def MapToEmployer (email, unknown = 0):
+ email = email.lower ()
+ try:
+ return EmailToEmployer[email]
+ except KeyError:
+ pass
+ namedom = email.split ('@')
+ if len (namedom) < 2:
+ print 'Oops...funky email %s' % email
+ return [(nextyear, GetEmployer ('Funky'))]
+ s = namedom[1].split ('.')
+ for dots in range (len (s) - 2, -1, -1):
+ addr = '.'.join (s[dots:])
+ try:
+ return EmailToEmployer[addr]
+ except KeyError:
+ pass
+ if unknown:
+ return [(nextyear, GetEmployer ('(Unknown)'))]
+ return [(nextyear, GetEmployer (email))]
+
+
+def LookupEmployer (email, mapunknown = 0):
+ elist = MapToEmployer (email, mapunknown)
+ return elist # GetEmployer (ename)
diff --git a/gitdm b/gitdm
new file mode 100755
index 0000000..32f27c8
--- /dev/null
+++ b/gitdm
@@ -0,0 +1,499 @@
+#!/usr/bin/python
+#
+
+#
+# This code is part of the LWN git data miner.
+#
+# Copyright 2007 LWN.net
+# Copyright 2007 Jonathan Corbet <corbet@lwn.net>
+#
+# This file may be distributed under the terms of the GNU General
+# Public License, version 2.
+
+
+import database, ConfigFile
+import getopt, datetime
+import os, re, sys, rfc822, string
+
+#
+# Some people, when confronted with a problem, think "I know, I'll use regular
+# expressions." Now they have two problems.
+# -- Jamie Zawinski
+#
+Pcommit = re.compile (r'^commit ([0-9a-f]+)$')
+Pauthor = re.compile (r'^Author: ([^<]+)\s<([^>]+)>$')
+Psob = re.compile (r'Signed-off-by:\s+([^<]+)\s+<([^>]+)>')
+Pmerge = re.compile (r'^Merge:.*$')
+Padd = re.compile (r'^\+[^\+].*$')
+Prem = re.compile (r'^-[^-].*$')
+Pdate = re.compile (r'^(Commit)?Date:\s+(.*)$')
+Pfilea = re.compile (r'^---\s+(.*)$')
+Pfileb = re.compile (r'^\+\+\+\s+(.*)$')
+
+class patch:
+ pass
+
+
+#
+# Control options.
+#
+Outfile = sys.stdout
+ListCount = 999999
+MapUnknown = 0
+DevReports = 1
+DateStats = 0
+AuthorSOBs = 1
+FileFilter = None
+AkpmOverLt = 0
+DumpDB = 0
+CFName = 'gitdm.config'
+#
+# Options:
+#
+# -a Andrew Morton's signoffs shadow Linus's
+# -c cfile Specify a configuration file
+# -d Output individual developer stats
+# -D Output date statistics
+# -h hfile HTML output to hfile
+# -l count Maximum length for output lists
+# -o file File for text output
+# -r pattern Restrict to files matching pattern
+# -s Ignore author SOB lines
+# -u Map unknown employers to '(Unknown)'
+# -z Dump out the hacker database at completion
+
+def ParseOpts ():
+ global Outfile, ListCount, MapUnknown, HTMLfile, DevReports
+ global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
+ global CFName
+
+ opts, rest = getopt.getopt (sys.argv[1:], 'adDh:l:o:r:suz')
+ for opt in opts:
+ if opt[0] == '-a':
+ AkpmOverLt = 1
+ elif opt[0] == '-c':
+ CFName = opt[1]
+ elif opt[0] == '-d':
+ DevReports = 0
+ elif opt[0] == '-D':
+ DateStats = 1
+ elif opt[0] == '-h':
+ HTMLfile = open (opt[1], 'w')
+ elif opt[0] == '-l':
+ ListCount = int (opt[1])
+ elif opt[0] == '-o':
+ Outfile = open (opt[1], 'w')
+ elif opt[0] == '-r':
+ print 'Filter on "%s"' % (opt[1])
+ FileFilter = re.compile (opt[1])
+ elif opt[0] == '-s':
+ AuthorSOBs = 0
+ elif opt[0] == '-u':
+ MapUnknown = 1
+ elif opt[0] == '-z':
+ DumpDB = 1
+
+
+
+def LookupStoreHacker (name, email):
+ email = database.RemapEmail (email)
+ h = database.LookupEmail (email)
+ if h: # already there
+ return h
+ elist = database.LookupEmployer (email, MapUnknown)
+ h = database.LookupName (name)
+ if h: # new email
+ h.addemail (email, elist)
+ return h
+ return database.StoreHacker(name, elist, email)
+
+#
+# Date tracking.
+#
+
+DateMap = { }
+
+def AddDateLines(date, lines):
+ if lines > 1000000:
+ print 'Skip big patch (%d)' % lines
+ return
+ dt = (date.year, date.month, date.day)
+ try:
+ DateMap[date] += lines
+ except KeyError:
+ DateMap[date] = lines
+
+def PrintDateStats():
+ dates = DateMap.keys ()
+ dates.sort ()
+ total = 0
+ datef = open ('datelc', 'w')
+ for date in dates:
+ total += DateMap[date]
+ datef.write ('%d/%02d/%02d %6d %7d\n' % (date[0], date[1], date[2],
+ DateMap[date], total))
+
+#
+# The core hack for grabbing the information about a changeset.
+#
+def grabpatch():
+ global NextLine, TotalAdded, TotalRemoved, TotalChanged
+
+ while (1):
+ m = Pcommit.match (NextLine)
+ if m:
+ break;
+ NextLine = sys.stdin.readline ()
+ if not NextLine:
+ return
+
+ p = patch()
+ p.commit = m.group (1)
+ p.merge = p.added = p.removed = 0
+ p.author = LookupStoreHacker('Unknown hacker', 'unknown@hacker.net')
+ p.email = 'unknown@hacker.net'
+ p.sobs = [ ]
+ NextLine = sys.stdin.readline ()
+ ignore = (FileFilter is not None)
+ while NextLine:
+ Line = NextLine
+ #
+ # If this line starts a new commit, drop out.
+ #
+ m = Pcommit.match (Line)
+ if m:
+ break
+ NextLine = sys.stdin.readline ()
+ #
+ # Maybe it's an author line?
+ #
+ m = Pauthor.match (Line)
+ if m:
+ p.email = database.RemapEmail (m.group (2))
+ p.author = LookupStoreHacker(m.group (1), p.email)
+ continue
+ #
+ # Could be a signed-off-by:
+ #
+ m = Psob.search (Line)
+ if m:
+ email = database.RemapEmail (m.group (2))
+ sobber = LookupStoreHacker(m.group (1), email)
+ if sobber != p.author or AuthorSOBs:
+ p.sobs.append ((email, LookupStoreHacker(m.group (1), m.group (2))))
+ continue
+ #
+ # If this one is a merge, make note of the fact.
+ #
+ m = Pmerge.match (Line)
+ if m:
+ p.merge = 1
+ continue
+ #
+ # See if it's the date.
+ #
+ m = Pdate.match (Line)
+ if m:
+ dt = rfc822.parsedate(m.group (2))
+ p.date = datetime.date (dt[0], dt[1], dt[2])
+ continue
+ #
+ # If we have a file filter, check for file lines.
+ #
+ if FileFilter:
+ ignore = ApplyFileFilter (Line, ignore)
+ #
+ # OK, maybe it's part of the diff itself.
+ #
+ if not ignore:
+ if Padd.match (Line):
+ p.added += 1
+ continue
+ if Prem.match (Line):
+ p.removed += 1
+ #
+ # Record some global information - but only if this patch had
+ # stuff which wasn't ignored. This work should be done
+ # elsewhere,
+ #
+ if ((p.added + p.removed) > 0 or not FileFilter) and not p.merge:
+ TotalAdded += p.added
+ TotalRemoved += p.removed
+ TotalChanged += max (p.added, p.removed)
+ AddDateLines (p.date, max (p.added, p.removed))
+ empl = p.author.emailemployer (p.email, p.date)
+ empl.AddCSet (p)
+ if AkpmOverLt:
+ TrimLTSOBs (p)
+ for sobemail, sobber in p.sobs:
+ empl = sobber.emailemployer (sobemail, p.date)
+ empl.AddSOB()
+ return p
+
+
+def ApplyFileFilter (line, ignore):
+ #
+ # If this is the first file line (--- a/), set ignore one way
+ # or the other.
+ #
+ m = Pfilea.match (line)
+ if m:
+ file = m.group (1)
+ if FileFilter.search (file):
+ return 0
+ return 1
+ #
+ # For the second line, we can turn ignore off, but not on
+ #
+ m = Pfileb.match (line)
+ if m:
+ file = m.group (1)
+ if FileFilter.search (file):
+ return 0
+ return ignore
+
+#
+# If this patch is signed off by both Andrew Morton and Linus Torvalds,
+# remove the (redundant) Linus signoff.
+#
+def TrimLTSOBs (p):
+ if Linus in p.sobs and Akpm in p.sobs:
+ p.sobs.remove (Linus)
+
+#
+# HTML output support stuff.
+#
+HTMLfile = None
+HTMLclass = 0
+HClasses = ['Even', 'Odd']
+
+THead = '''<p>
+<table cellspacing=3>
+<tr><th colspan=3>%s</th></tr>
+'''
+
+
+def BeginReport (title):
+ global HTMLclass
+
+ Outfile.write ('\n%s\n' % title)
+ if HTMLfile:
+ HTMLfile.write (THead % title)
+ HTMLclass = 0
+
+TRow = ''' <tr class="%s">
+<td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr>
+'''
+
+def ReportLine (text, count, pct):
+ global HTMLclass
+ if count == 0:
+ return
+ Outfile.write ('%-25s %4d (%.1f%%)\n' % (text, count, pct))
+ if HTMLfile:
+ HTMLfile.write (TRow % (HClasses[HTMLclass], text, count, pct))
+ HTMLclass ^= 1
+
+def EndReport ():
+ if HTMLfile:
+ HTMLfile.write ('</table>\n\n')
+
+#
+# Comparison and report generation functions.
+#
+def ComparePCount (h1, h2):
+ return len (h2.patches) - len (h1.patches)
+
+def ReportByPCount (hlist):
+ hlist.sort (ComparePCount)
+ count = 0
+ BeginReport ('Developers with the most changesets')
+ for h in hlist:
+ pcount = len (h.patches)
+ changed = max(h.added, h.removed)
+ delta = h.added - h.removed
+ if pcount > 0:
+ ReportLine (h.name, pcount, (pcount*100.0)/CSCount)
+ count += 1
+ if count >= ListCount:
+ break
+ EndReport ()
+
+def CompareLChanged (h1, h2):
+ return max(h2.added, h2.removed) - max(h1.added, h1.removed)
+
+def ReportByLChanged (hlist):
+ hlist.sort (CompareLChanged)
+ count = 0
+ BeginReport ('Developers with the most changed lines')
+ for h in hlist:
+ pcount = len (h.patches)
+ changed = max(h.added, h.removed)
+ delta = h.added - h.removed
+ if (h.added + h.removed) > 0:
+ ReportLine (h.name, changed, (changed*100.0)/TotalChanged)
+ count += 1
+ if count >= ListCount:
+ break
+ EndReport ()
+
+def CompareLRemoved (h1, h2):
+ return (h2.removed - h2.added) - (h1.removed - h1.added)
+
+def ReportByLRemoved (hlist):
+ hlist.sort (CompareLRemoved)
+ count = 0
+ BeginReport ('Developers with the most lines removed')
+ for h in hlist:
+ pcount = len (h.patches)
+ changed = max(h.added, h.removed)
+ delta = h.added - h.removed
+ if delta < 0:
+ ReportLine (h.name, -delta, (-delta*100.0)/TotalRemoved)
+ count += 1
+ if count >= ListCount:
+ break
+ EndReport ()
+
+def CompareEPCount (e1, e2):
+ return e2.count - e1.count
+
+def ReportByPCEmpl (elist):
+ elist.sort (CompareEPCount)
+ count = 0
+ BeginReport ('Top changeset contributors by employer')
+ for e in elist:
+ if e.count != 0:
+ ReportLine (e.name, e.count, (e.count*100.0)/CSCount)
+ count += 1
+ if count >= ListCount:
+ break
+ EndReport ()
+
+
+
+def CompareELChanged (e1, e2):
+ return e2.changed - e1.changed
+
+def ReportByELChanged (elist):
+ elist.sort (CompareELChanged)
+ count = 0
+ BeginReport ('Top lines changed by employer')
+ for e in elist:
+ if e.changed != 0:
+ ReportLine (e.name, e.changed, (e.changed*100.0)/TotalChanged)
+ count += 1
+ if count >= ListCount:
+ break
+ EndReport ()
+
+
+
+def CompareSOBs (h1, h2):
+ return len (h2.signoffs) - len (h1.signoffs)
+
+def ReportBySOBs (hlist):
+ hlist.sort (CompareSOBs)
+ totalsobs = 0
+ for h in hlist:
+ totalsobs += len (h.signoffs)
+ count = 0
+ BeginReport ('Developers with the most signoffs (total %d)' % totalsobs)
+ for h in hlist:
+ scount = len (h.signoffs)
+ if scount > 0:
+ ReportLine (h.name, scount, (scount*100.0)/totalsobs)
+ count += 1
+ if count >= ListCount:
+ break
+ EndReport ()
+
+def CompareESOBs (e1, e2):
+ return e2.sobs - e1.sobs
+
+def ReportByESOBs (elist):
+ elist.sort (CompareESOBs)
+ totalsobs = 0
+ for e in elist:
+ totalsobs += e.sobs
+ count = 0
+ BeginReport ('Employers with the most signoffs (total %d)' % totalsobs)
+ for e in elist:
+ if e.sobs > 0:
+ ReportLine (e.name, e.sobs, (e.sobs*100.0)/totalsobs)
+ count += 1
+ if count >= ListCount:
+ break
+ EndReport ()
+
+#
+# Here starts the real program. Read the config files.
+#
+ConfigFile.ConfigFile (CFName)
+
+#
+# Let's pre-seed the database with a couple of hackers
+# we want to remember.
+#
+Linus = ('torvalds@linux-foundation.org',
+ LookupStoreHacker ('Linus Torvalds', 'torvalds@linux-foundation.org'))
+Akpm = ('akpm@linux-foundation.org',
+ LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org'))
+
+NextLine = sys.stdin.readline ()
+TotalChanged = TotalAdded = TotalRemoved = 0
+ParseOpts ()
+
+#
+# Snarf changesets.
+#
+print 'Grabbing changesets...\r',
+sys.stdout.flush ()
+
+printcount = CSCount = 0
+while (1):
+ if (printcount % 50) == 0:
+ print 'Grabbing changesets...%d\r' % printcount,
+ sys.stdout.flush ()
+ printcount += 1
+ p = grabpatch()
+ if not p:
+ break
+ if p.added > 100000 or p.removed > 100000:
+ print 'Skipping massive add'
+ continue
+ if FileFilter and p.added == 0 and p.removed == 0:
+ continue
+ if not p.merge:
+ p.author.addpatch (p)
+ for sobemail, sob in p.sobs:
+ sob.addsob (p)
+ CSCount += 1
+print 'Grabbing changesets...done'
+
+if DumpDB:
+ database.DumpDB ()
+#
+# Say something
+#
+hlist = database.AllHackers ()
+elist = database.AllEmployers ()
+Outfile.write ('Processed %d csets from %d developers\n' % (CSCount,
+ len (hlist)))
+Outfile.write ('%d employers found\n' % len (elist))
+Outfile.write ('A total of %d lines added, %d removed (delta %d)\n' %
+ (TotalAdded, TotalRemoved, TotalAdded - TotalRemoved))
+if TotalChanged == 0:
+ TotalChanged = 1 # HACK to avoid div by zero
+if DateStats:
+ PrintDateStats ()
+ sys.exit(0)
+
+if DevReports:
+ ReportByPCount (hlist)
+ ReportByLChanged (hlist)
+ ReportByLRemoved (hlist)
+ ReportBySOBs (hlist)
+ReportByPCEmpl (elist)
+ReportByELChanged (elist)
+ReportByESOBs (elist)
diff --git a/gitdm.config b/gitdm.config
new file mode 100644
index 0000000..588d6ef
--- /dev/null
+++ b/gitdm.config
@@ -0,0 +1,22 @@
+#
+# This is a sample gitdm configuration file.
+#
+
+#
+# EmailAliases lets us cope with developers who use more
+# than one address.
+#
+EmailAliases sample-config/aliases
+
+#
+# EmailMap does the main work of mapping addresses onto
+# employers.
+#
+EmailMap sample-config/domain-map
+
+#
+# Use GroupMap to map a file full of addresses to the
+# same employer
+#
+# GroupMap sample-config/illuminati The Illuminati
+#
diff --git a/sample-config/aliases b/sample-config/aliases
new file mode 100644
index 0000000..8cd50db
--- /dev/null
+++ b/sample-config/aliases
@@ -0,0 +1,5 @@
+#
+# This is the email aliases file, mapping secondary addresses
+# onto a single, canonical address.
+#
+corbet@eklektix.com corbet@lwn.net
diff --git a/sample-config/domain-map b/sample-config/domain-map
new file mode 100644
index 0000000..bbd81f7
--- /dev/null
+++ b/sample-config/domain-map
@@ -0,0 +1,242 @@
+#
+# Here is a set of mappings of domain names onto employer names.
+#
+8d.com 8D Technologies
+aconex.com Aconex
+adaptec.com Adaptec
+aist.go.jp National Institute of Advanced Industrial Science and Technology
+akamai.com Akamai Technologies
+am.sony.com Sony
+amd.com AMD
+analog.com Analog Devices
+arastra.com Arastra Inc
+arm.com ARM
+artecdesign.ee Artec Design
+arvoo.nl ARVOO Engineering
+atmel.com Atmel
+atomide.com Atomide
+avtrex.com Avtrex
+axis.com Axis Communications
+azingo.com Azingo
+balabit.com BalaBit
+balabit.hu BalaBit
+baslerweb.com Basler Vision Technologies
+bluehost.com Bluehost
+bluewatersys.com Bluewater Systems
+broadcom.com Broadcom
+brontes3d.com Brontes Technologies
+bull.net Bull SAS
+cam.ac.uk University of Cambridge
+ccur.com Concurrent Computer Corporation
+celunite.com Azingo
+chelsio.com Chelsio
+cisco.com Cisco
+citi.umich.edu Univ. of Michigan CITI
+clusterfs.com Sun
+cn.fujitsu.com Fujitsu
+compulab.co.il CompuLab
+computergmbh.de CC Computer Consultants
+comx.dk ComX Networks
+conectiva.com.br Mandriva
+coraid.com Coraid
+cosmosbay.com Cosmosbay~Vectis
+cozybit.com cozybit
+cray.com Cray
+csr.com CSR
+cyberguard.com Secure Computing
+cybernetics.com Cybernetics
+data.slu.se Uppsala University
+dave.eu Dave S.r.l.
+de.bosch.com Bosch
+dell.com Dell
+denx.de DENX Software Engineering
+devicescape.com Devicescape
+digi.com Digi International
+dti2.net DTI2 - Desarrollo de la tecnologia de las comunicaciones
+edesix.com Edesix Ltd
+elandigitalsystems.com Elan Digital Systems
+embeddedalley.com Embedded Alley Solutions
+empirix.com Empirix
+emulex.com Emulex
+endrelia.com Endrelia
+ericsson.com Ericsson
+fixstars.com Fixstars Technologies
+free-electrons.com Free Electrons
+freescale.com Freescale
+fujitsu.com Fujitsu
+gaisler.com Gaisler Research
+gefanuc.com GE Fanuc
+geomatys.fr Geomatys
+google.com Google
+gvs.co.yu GVS
+hansenpartnership.com Hansen Partnership
+harris.com Harris Corporation
+hauppauge.com Hauppauge
+hermes-softlab.com HERMES SoftLab
+hevs.ch HES-SO Valais Wallis
+highpoint-tech.com HighPoint Technologies
+hitachi.co.jp Hitachi
+hitachi.com Hitachi
+hitachisoft.jp Hitachi
+hp.com HP
+hvsistemas.es HV Sistemas
+ibm.com IBM
+ibp.de ipb (uk) Ltd.
+icplus.com.tw IC Plus
+igel.co.jp igel
+inl.fr INL
+inria.fr INRIA
+intel.com Intel
+iram.es IRAM
+jmicron.com jmicron.com
+jp.fujitsu.com Fujitsu
+katalix.com Katalix Systems
+keyspan.com InnoSys
+laptop.org OLPC
+laurelnetworks.com ECI Telecom
+linutronix.de linutronix
+linux-foundation.org Linux Foundation
+lippert-at.de LiPPERT Embedded Computers GmbH
+lippertembedded.de LiPPERT Embedded Computers GmbH
+llnl.gov Lawrence Livermore National Laboratory
+lnxi.com Linux Networx
+logitech.com Logitech
+lsi.com LSI Logic
+lsil.com LSI Logic
+lwn.net LWN.net
+macqel.be Macq Electronique
+macqel.com Macq Electronique
+mandriva.com Mandriva
+mandriva.com.br Mandriva
+marvell.com Marvell
+mellanox.co.il Mellanox
+melware.de Cytronics & Melware
+microgate.com MicroGate Systems
+mips.com MIPS
+miraclelinux.com Miracle Linux
+mn-solutions.de M&N Solutions
+moreton.com.au Secure Computing
+motorola.com Motorola
+movial.fi Movial
+mvista.com MontaVista
+myri.com Myricom
+namesys.com NameSys
+nec.co.jp NEC
+nec.com NEC
+netapp.com NetApp
+neterion.com Neterion
+netxen.com NetXen
+niif.hu NIIF Institute
+nokia.com Nokia
+nomadgs.com Nomad Global Solutions
+nortel.com Nortel
+novell.com Novell
+ntt.co.jp NTT
+ntts.co.jp NTT
+nuovasystems.com Nuova Systems
+nvidia.com NVidia
+obsidianresearch.com Obsidian Research
+octant-fr.com Octant Informatique
+onelan.co.uk ONELAN
+onstor.com Onstor
+openedhand.com OpenedHand
+opengridcomputing.com Open Grid Computing
+openmoko.org OpenMoko
+openvz.org Parallels
+oracle.com Oracle
+ornl.gov Oak Ridge National Laboratory
+osdl.org Linux Foundation
+ozlabs.org IBM
+panasas.com Panasas
+panasonic.com Panasonic
+papercut.bz PaperCut Software
+papercut.com PaperCut Software
+parallels.com Parallels
+pasemi.com PA Semi Corporation
+pengutronix.de Pengutronix
+pheonix.com Phoeonix
+philosys.de Philosys Software
+pikron.com PiKRON s.r.o
+pmc-sierra.com PMC-Sierra
+promise.com Promise Technology
+qlogic.com QLogic
+qumranet.com Qumranet
+realtek.com.tw Realtek
+redhat.com Red Hat
+renesas.com Renesas Technology
+rockwell.com Rockwell
+rowland.harvard.edu Rowland Institute, Harvard
+rtr.ca Real-Time Remedies
+samsung.com Samsung
+sanpeople.com SANPeople
+savantav.com Savant Systems
+secretlab.ca Secretlab
+securecomputing.com Secure Computing
+semihalf.com Semihalf Embedded Systems
+sf-tec.de Science Fiction Technologies
+sgi.com SGI
+sicortex.com Sicortex
+siemens.com Siemens
+sierrawireless.com Sierra Wireless
+sigma-chemnitz.de SIGMA Chemnitz
+snapgear.com Snapgear
+solidboot.com Solid Boot Ltd.
+sony.co.jp Sony
+sony.com Sony
+sonycom.com Sony
+spidernet.net SpiderNet Services
+st.com ST Microelectronics
+stlinux.com ST Microelectronics
+starentnetworks.com Starent Networks
+steeleye.com SteelEye
+sun.com Sun
+suse.com Novell
+suse.cz Novell
+suse.de Novell
+sw.ru Parallels
+swsoft.com Parallels
+tapsys.com Tapestry Systems
+telargo.com Telargo
+tensilica.com Tensilica
+terascala.com Terascala
+thinktube.com Thinktube
+ti.com Texas Instruments
+til-technologies.fr TIL Technologies
+tls.msk.ru Telecom-Service
+toptica.com TOPTICA Photonics
+toshiba.co.jp Toshiba
+total-knowledge.com Total Knowledge
+towertech.it Tower Technologies
+tpi.com TriplePoint
+transitive.com Transitive
+transmode.se Transmode Systems
+tresys.com Tresys
+tripeaks.co.jp Tripeaks
+trustedcs.com Trusted Computer Solutions
+tungstengraphics.com Tungsten Graphics
+tycho.nsa.gov US National Security Agency
+ubuntu.com Canonical
+uhulinux.hu UHU-Linux
+unisys.com Unisys
+valinux.co.jp VA Linux Systems Japan
+verismonetworks.com Verismo
+veritas.com Veritas
+via.com.tw Via
+vivecode.com Vivecode
+vmware.com VMWare
+volkswagen.de Volkswagen
+voltaire.com Voltaire
+vyatta.com Vyatta
+wabtec.com Wabtec Railway Electronics
+wacom.com Wacom
+winbond.com Winbond Electronics
+winbond.com.tw Winbond Electronics
+wincor-nixdorf.com Wincor Nixdorf
+windriver.com Wind River
+wipro.com Wipro
+wolfsonmicro.com Wolfson Microelectronics
+xensource.com XenSource
+xiv.co.il XIV Information Systems
+xivstorage.com XIV Information Systems
+trinnov.com Trinnov Audio
+citrix.com Citrix