diff options
author | Jonathan Corbet <corbet@lwn.net> | 2008-06-27 08:58:35 -0600 |
---|---|---|
committer | Jonathan Corbet <corbet@lwn.net> | 2008-06-27 08:58:35 -0600 |
commit | e1a6d06d6553c3b2026304f5379c3737f1743e46 (patch) | |
tree | ac30cd7941aa0222e1736b790a4c67ec8090695d |
Initial commit
First commit of gitdm to the new repo. Call it version 0.10 or something
silly like that.
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | COPYING | 2 | ||||
-rw-r--r-- | ConfigFile.py | 110 | ||||
-rw-r--r-- | README | 107 | ||||
-rw-r--r-- | database.py | 202 | ||||
-rwxr-xr-x | gitdm | 499 | ||||
-rw-r--r-- | gitdm.config | 22 | ||||
-rw-r--r-- | sample-config/aliases | 5 | ||||
-rw-r--r-- | sample-config/domain-map | 242 |
9 files changed, 1191 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f3d74a9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +*~ @@ -0,0 +1,2 @@ +The code in this directory can be distributed under the terms of the GNU +General Public License, version 2. diff --git a/ConfigFile.py b/ConfigFile.py new file mode 100644 index 0000000..39310fb --- /dev/null +++ b/ConfigFile.py @@ -0,0 +1,110 @@ +# +# Stuff for dealing with configuration files. +# +import sys, re, datetime +import database + +# +# Read a line and strip out junk. +# +def ReadConfigLine (file): + line = file.readline () + if not line: + return None + line = line.split('#')[0] # Get rid of any comments + line = line.strip () # and extra white space + if len (line) == 0: # we got rid of everything + return ReadConfigLine (file) + return line + +# +# Give up and die. +# +def croak (message): + sys.stderr.write (message + '\n') + sys.exit (1) + +# +# Read a list of email aliases. +# +def ReadEmailAliases (name): + try: + file = open (name, 'r') + except IOError: + croak ('Unable to open email alias file %s' % (name)) + line = ReadConfigLine (file) + while line: + sline = line.split () + if len (sline) != 2: + croak ('Funky email alias line "%s"' % (line)) + if sline[0].index ('@') <= 0 or sline[1].index ('@') <= 0: + croak ('Non-addresses in email alias "%s"' % (line)) + database.AddEmailAlias (sline[0], sline[1]) + line = ReadConfigLine (file) + file.close () + +# +# The Email/Employer map +# +EMMpat = re.compile (r'^([^\s]+)\s+([^<]+)\s*(<\s*(\d+-\d+-\d+)\s*)?$') + +def ReadEmailEmployers (name): + try: + file = open (name, 'r') + except IOError: + croak ('Unable to open email/employer file %s' % (name)) + line = ReadConfigLine (file) + while line: + m = EMMpat.match (line) + if not m: + croak ('Funky email/employer line "%s"' % (line)) + email = m.group (1) + company = m.group (2).strip () + enddate = ParseDate (m.group (4)) + database.AddEmailEmployerMapping (email, company, enddate) + line = ReadConfigLine (file) + file.close () + +def ParseDate (cdate): + if not cdate: + return None + sdate = cdate.split ('-') + return datetime.date (int (sdate[0]), int (sdate[1]), int (sdate[2])) + + +def ReadGroupMap (fname, employer): + try: + file = open (fname, 'r') + except IOError: + croak ('Unable to open group map file %s' % (fname)) + line = ReadConfigLine (file) + while line: + database.AddEmailEmployerMapping (line, employer) + line = ReadConfigLine (file) + file.close () + +# +# Read an overall config file. +# +def ConfigFile (name): + try: + file = open (name, 'r') + except IOError: + croak ('Unable to open config file %s' % (name)) + line = ReadConfigLine (file) + while line: + sline = line.split () + if len (sline) < 2: + croak ('Funky config line: "%s"' % (line)) + if sline[0] == 'EmailAliases': + ReadEmailAliases (sline[1]) + elif sline[0] == 'EmailMap': + ReadEmailEmployers (sline[1]) + elif sline[0] == 'GroupMap': + if len (sline) != 3: + croak ('Funky group map line "%s"' % (line)) + ReadGroupMap (sline[1], sline[2]) + else: + croak ('Unrecognized config line: "%s"' % (line)) + line = ReadConfigLine (file) + @@ -0,0 +1,107 @@ +The code in this directory makes up the "git data miner," a simple hack +which attempts to figure things out from the revision history in a git +repository. + +RUNNING GITDM + +Run it like this: + + git log -p -M [details] | gitdm [options] + +The [details] tell git which changesets are of interest; the [options] can +be: + + -a If a patch contains signoff lines from both Andrew Morton + and Linus Torvalds, omit Linus's. + + -c file Specify the name of the gitdm configuration file. + By default, "./gitdm.config" is used. + + -d Omit the developer reports, giving employer information + only. + + -D Rather than create the usual statistics, create a + file providing lines changed per day, suitable for + feeding to a tool like gnuplot. + + -h file Generate HTML output to the given file + + -l num Only list the top <num> entries in each report. + + -o file Write text output to the given file (default is stdout). + + -r pat Only generate statistics for changes to files whose + name matches the given regular expression. + + -s Ignore Signed-off-by lines which match the author of + each patch. + + -u Group all unknown developers under the "(Unknown)" + employer. + + -z Dump out the hacker database to "database.dump". + +A typical command line used to generate the "who write 2.6.x" LWN articles +looks like: + + git log -p -M v2.6.19..v2.6.20 | \ + gitdm -u -s -a -o results -h results.html + + +CONFIGURATION FILE + +The main purpose of the configuration file is to direct the mapping of +email addresses onto employers. Please note that the config file parser is +exceptionally stupid and unrobust at this point, but it gets the job done. + +Blank lines and lines beginning with "#" are ignored. Everything else +specifies a file with some sort of mapping: + +EmailAliases file + + Developers often post code under a number of different email + addresses, but it can be desirable to group them all together in + the statistics. An EmailAliases file just contains a bunch of + lines of the form: + + alias@address canonical@address + + Any patches originating from alias@address will be treated as if + they had come from canonical@address. + + +EmailMap file + + Map email addresses onto employers. These files contain lines + like: + + [user@]domain employer [< yyyy-mm-dd] + + If the "user@" portion is missing, all email from the given domain + will be treated as being associated with the given employer. If a + date is provided, the entry is only valid up to that date; + otherwise it is considered valid into the indefinite future. This + feature can be useful for properly tracking developers' work when + they change employers but do not change email addresses. + + +GroupMap file employer + + This is a variant of EmailMap provided for convenience; it contains + email addresses only, all of which are associated with the given + employer. + + +NOTES AND CREDITS + +Gitdm was written by Jonathan Corbet; many useful contributions have come +from Greg Kroah-Hartman. + +Please note that this tool is provided in the hope that it will be useful, +but it is not put forward as an example of excellence in design or +implementation. Hacking on gitdm tends to stop the moment it performs +whatever task is required of it at the moment. Patches to make it less +hacky, less ugly, and more robust are welcome. + +Jonathan Corbet +corbet@lwn.net diff --git a/database.py b/database.py new file mode 100644 index 0000000..edb54cf --- /dev/null +++ b/database.py @@ -0,0 +1,202 @@ +# +# The "database". +# + +# +# This code is part of the LWN git data miner. +# +# Copyright 2007 LWN.net +# Copyright 2007 Jonathan Corbet <corbet@lwn.net> +# +# This file may be distributed under the terms of the GNU General +# Public License, version 2. +import sys, datetime + + +class Hacker: + def __init__ (self, name, id, elist, email): + self.name = name + self.id = id + self.employer = [ elist ] + self.email = [ email ] + self.added = self.removed = 0 + self.patches = [ ] + self.signoffs = [ ] + + def addemail (self, email, elist): + self.email.append (email) + self.employer.append (elist) + HackersByEmail[email] = self + + def emailemployer (self, email, date): + for i in range (0, len (self.email)): + if self.email[i] == email: + for edate, empl in self.employer[i]: + if edate > date: + return empl + print 'OOPS. ', self.name, self.employer, self.email, email, date + return None # Should not happen + + def addpatch (self, patch): + self.added += patch.added + self.removed += patch.removed + self.patches.append (patch) + + def addsob (self, patch): + self.signoffs.append (patch) + +HackersByName = { } +HackersByEmail = { } +HackersByID = { } +MaxID = 0 + +def StoreHacker (name, elist, email): + global MaxID + + id = MaxID + MaxID += 1 + h = Hacker (name, id, elist, email) + HackersByName[name] = h + HackersByEmail[email] = h + HackersByID[id] = h + return h + +def LookupEmail (addr): + try: + return HackersByEmail[addr] + except KeyError: + return None + +def LookupName (name): + try: + return HackersByName[name] + except KeyError: + return None + +def LookupID (id): + try: + return HackersByID[id] + except KeyError: + return None + +def AllHackers (): + return HackersByID.values () +# return [h for h in HackersByID.values ()] # if (h.added + h.removed) > 0] + +def DumpDB (): + out = open ('database.dump', 'w') + names = HackersByName.keys () + names.sort () + for name in names: + h = HackersByName[name] + out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name, + len (h.patches), + h.added, h.removed, + len (h.signoffs))) + for i in range (0, len (h.email)): + out.write ('\t%s -> \n' % (h.email[i])) + for date, empl in h.employer[i]: + out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day, + empl.name)) + +# +# Employer info. +# +class Employer: + def __init__ (self, name): + self.name = name + self.added = self.removed = self.count = self.changed = 0 + self.sobs = 0 + self.hackers = [ ] + + def AddCSet (self, patch): + self.added += patch.added + self.removed += patch.removed + self.changed += max(patch.added, patch.removed) + self.count += 1 + if patch.author not in self.hackers: + self.hackers.append (patch.author) + + def AddSOB (self): + self.sobs += 1 + +Employers = { } + +def GetEmployer (name): + try: + return Employers[name] + except KeyError: + e = Employer (name) + Employers[name] = e + return e + +def AllEmployers (): + return Employers.values () + +# +# The email map. +# +EmailAliases = { } + +def AddEmailAlias (variant, canonical): + if EmailAliases.has_key (variant): + sys.stderr.write ('Duplicate email alias for %s\n' % (variant)) + EmailAliases[variant] = canonical + +def RemapEmail (email): + email = email.lower () + try: + return EmailAliases[email] + except KeyError: + return email + +# +# Email-to-employer mapping. +# +EmailToEmployer = { } +nextyear = datetime.date.today () + datetime.timedelta (days = 365) + +def AddEmailEmployerMapping (email, employer, end = nextyear): + if end is None: + end = nextyear + email = email.lower () + empl = GetEmployer (employer) + try: + l = EmailToEmployer[email] + print email, l + for i in range (0, len(l)): + date, xempl = l[i] + if date == end: # probably both nextyear + print 'WARNING: duplicate email/empl for %s' % (email) + if date > end: + l.insert (i, (end, empl)) + return + l.append ((end, empl)) + except KeyError: + EmailToEmployer[email] = [(end, empl)] + +def MapToEmployer (email, unknown = 0): + email = email.lower () + try: + return EmailToEmployer[email] + except KeyError: + pass + namedom = email.split ('@') + if len (namedom) < 2: + print 'Oops...funky email %s' % email + return [(nextyear, GetEmployer ('Funky'))] + s = namedom[1].split ('.') + for dots in range (len (s) - 2, -1, -1): + addr = '.'.join (s[dots:]) + try: + return EmailToEmployer[addr] + except KeyError: + pass + if unknown: + return [(nextyear, GetEmployer ('(Unknown)'))] + return [(nextyear, GetEmployer (email))] + + +def LookupEmployer (email, mapunknown = 0): + elist = MapToEmployer (email, mapunknown) + return elist # GetEmployer (ename) @@ -0,0 +1,499 @@ +#!/usr/bin/python +# + +# +# This code is part of the LWN git data miner. +# +# Copyright 2007 LWN.net +# Copyright 2007 Jonathan Corbet <corbet@lwn.net> +# +# This file may be distributed under the terms of the GNU General +# Public License, version 2. + + +import database, ConfigFile +import getopt, datetime +import os, re, sys, rfc822, string + +# +# Some people, when confronted with a problem, think "I know, I'll use regular +# expressions." Now they have two problems. +# -- Jamie Zawinski +# +Pcommit = re.compile (r'^commit ([0-9a-f]+)$') +Pauthor = re.compile (r'^Author: ([^<]+)\s<([^>]+)>$') +Psob = re.compile (r'Signed-off-by:\s+([^<]+)\s+<([^>]+)>') +Pmerge = re.compile (r'^Merge:.*$') +Padd = re.compile (r'^\+[^\+].*$') +Prem = re.compile (r'^-[^-].*$') +Pdate = re.compile (r'^(Commit)?Date:\s+(.*)$') +Pfilea = re.compile (r'^---\s+(.*)$') +Pfileb = re.compile (r'^\+\+\+\s+(.*)$') + +class patch: + pass + + +# +# Control options. +# +Outfile = sys.stdout +ListCount = 999999 +MapUnknown = 0 +DevReports = 1 +DateStats = 0 +AuthorSOBs = 1 +FileFilter = None +AkpmOverLt = 0 +DumpDB = 0 +CFName = 'gitdm.config' +# +# Options: +# +# -a Andrew Morton's signoffs shadow Linus's +# -c cfile Specify a configuration file +# -d Output individual developer stats +# -D Output date statistics +# -h hfile HTML output to hfile +# -l count Maximum length for output lists +# -o file File for text output +# -r pattern Restrict to files matching pattern +# -s Ignore author SOB lines +# -u Map unknown employers to '(Unknown)' +# -z Dump out the hacker database at completion + +def ParseOpts (): + global Outfile, ListCount, MapUnknown, HTMLfile, DevReports + global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB + global CFName + + opts, rest = getopt.getopt (sys.argv[1:], 'adDh:l:o:r:suz') + for opt in opts: + if opt[0] == '-a': + AkpmOverLt = 1 + elif opt[0] == '-c': + CFName = opt[1] + elif opt[0] == '-d': + DevReports = 0 + elif opt[0] == '-D': + DateStats = 1 + elif opt[0] == '-h': + HTMLfile = open (opt[1], 'w') + elif opt[0] == '-l': + ListCount = int (opt[1]) + elif opt[0] == '-o': + Outfile = open (opt[1], 'w') + elif opt[0] == '-r': + print 'Filter on "%s"' % (opt[1]) + FileFilter = re.compile (opt[1]) + elif opt[0] == '-s': + AuthorSOBs = 0 + elif opt[0] == '-u': + MapUnknown = 1 + elif opt[0] == '-z': + DumpDB = 1 + + + +def LookupStoreHacker (name, email): + email = database.RemapEmail (email) + h = database.LookupEmail (email) + if h: # already there + return h + elist = database.LookupEmployer (email, MapUnknown) + h = database.LookupName (name) + if h: # new email + h.addemail (email, elist) + return h + return database.StoreHacker(name, elist, email) + +# +# Date tracking. +# + +DateMap = { } + +def AddDateLines(date, lines): + if lines > 1000000: + print 'Skip big patch (%d)' % lines + return + dt = (date.year, date.month, date.day) + try: + DateMap[date] += lines + except KeyError: + DateMap[date] = lines + +def PrintDateStats(): + dates = DateMap.keys () + dates.sort () + total = 0 + datef = open ('datelc', 'w') + for date in dates: + total += DateMap[date] + datef.write ('%d/%02d/%02d %6d %7d\n' % (date[0], date[1], date[2], + DateMap[date], total)) + +# +# The core hack for grabbing the information about a changeset. +# +def grabpatch(): + global NextLine, TotalAdded, TotalRemoved, TotalChanged + + while (1): + m = Pcommit.match (NextLine) + if m: + break; + NextLine = sys.stdin.readline () + if not NextLine: + return + + p = patch() + p.commit = m.group (1) + p.merge = p.added = p.removed = 0 + p.author = LookupStoreHacker('Unknown hacker', 'unknown@hacker.net') + p.email = 'unknown@hacker.net' + p.sobs = [ ] + NextLine = sys.stdin.readline () + ignore = (FileFilter is not None) + while NextLine: + Line = NextLine + # + # If this line starts a new commit, drop out. + # + m = Pcommit.match (Line) + if m: + break + NextLine = sys.stdin.readline () + # + # Maybe it's an author line? + # + m = Pauthor.match (Line) + if m: + p.email = database.RemapEmail (m.group (2)) + p.author = LookupStoreHacker(m.group (1), p.email) + continue + # + # Could be a signed-off-by: + # + m = Psob.search (Line) + if m: + email = database.RemapEmail (m.group (2)) + sobber = LookupStoreHacker(m.group (1), email) + if sobber != p.author or AuthorSOBs: + p.sobs.append ((email, LookupStoreHacker(m.group (1), m.group (2)))) + continue + # + # If this one is a merge, make note of the fact. + # + m = Pmerge.match (Line) + if m: + p.merge = 1 + continue + # + # See if it's the date. + # + m = Pdate.match (Line) + if m: + dt = rfc822.parsedate(m.group (2)) + p.date = datetime.date (dt[0], dt[1], dt[2]) + continue + # + # If we have a file filter, check for file lines. + # + if FileFilter: + ignore = ApplyFileFilter (Line, ignore) + # + # OK, maybe it's part of the diff itself. + # + if not ignore: + if Padd.match (Line): + p.added += 1 + continue + if Prem.match (Line): + p.removed += 1 + # + # Record some global information - but only if this patch had + # stuff which wasn't ignored. This work should be done + # elsewhere, + # + if ((p.added + p.removed) > 0 or not FileFilter) and not p.merge: + TotalAdded += p.added + TotalRemoved += p.removed + TotalChanged += max (p.added, p.removed) + AddDateLines (p.date, max (p.added, p.removed)) + empl = p.author.emailemployer (p.email, p.date) + empl.AddCSet (p) + if AkpmOverLt: + TrimLTSOBs (p) + for sobemail, sobber in p.sobs: + empl = sobber.emailemployer (sobemail, p.date) + empl.AddSOB() + return p + + +def ApplyFileFilter (line, ignore): + # + # If this is the first file line (--- a/), set ignore one way + # or the other. + # + m = Pfilea.match (line) + if m: + file = m.group (1) + if FileFilter.search (file): + return 0 + return 1 + # + # For the second line, we can turn ignore off, but not on + # + m = Pfileb.match (line) + if m: + file = m.group (1) + if FileFilter.search (file): + return 0 + return ignore + +# +# If this patch is signed off by both Andrew Morton and Linus Torvalds, +# remove the (redundant) Linus signoff. +# +def TrimLTSOBs (p): + if Linus in p.sobs and Akpm in p.sobs: + p.sobs.remove (Linus) + +# +# HTML output support stuff. +# +HTMLfile = None +HTMLclass = 0 +HClasses = ['Even', 'Odd'] + +THead = '''<p> +<table cellspacing=3> +<tr><th colspan=3>%s</th></tr> +''' + + +def BeginReport (title): + global HTMLclass + + Outfile.write ('\n%s\n' % title) + if HTMLfile: + HTMLfile.write (THead % title) + HTMLclass = 0 + +TRow = ''' <tr class="%s"> +<td>%s</td><td align="right">%d</td><td align="right">%.1f%%</td></tr> +''' + +def ReportLine (text, count, pct): + global HTMLclass + if count == 0: + return + Outfile.write ('%-25s %4d (%.1f%%)\n' % (text, count, pct)) + if HTMLfile: + HTMLfile.write (TRow % (HClasses[HTMLclass], text, count, pct)) + HTMLclass ^= 1 + +def EndReport (): + if HTMLfile: + HTMLfile.write ('</table>\n\n') + +# +# Comparison and report generation functions. +# +def ComparePCount (h1, h2): + return len (h2.patches) - len (h1.patches) + +def ReportByPCount (hlist): + hlist.sort (ComparePCount) + count = 0 + BeginReport ('Developers with the most changesets') + for h in hlist: + pcount = len (h.patches) + changed = max(h.added, h.removed) + delta = h.added - h.removed + if pcount > 0: + ReportLine (h.name, pcount, (pcount*100.0)/CSCount) + count += 1 + if count >= ListCount: + break + EndReport () + +def CompareLChanged (h1, h2): + return max(h2.added, h2.removed) - max(h1.added, h1.removed) + +def ReportByLChanged (hlist): + hlist.sort (CompareLChanged) + count = 0 + BeginReport ('Developers with the most changed lines') + for h in hlist: + pcount = len (h.patches) + changed = max(h.added, h.removed) + delta = h.added - h.removed + if (h.added + h.removed) > 0: + ReportLine (h.name, changed, (changed*100.0)/TotalChanged) + count += 1 + if count >= ListCount: + break + EndReport () + +def CompareLRemoved (h1, h2): + return (h2.removed - h2.added) - (h1.removed - h1.added) + +def ReportByLRemoved (hlist): + hlist.sort (CompareLRemoved) + count = 0 + BeginReport ('Developers with the most lines removed') + for h in hlist: + pcount = len (h.patches) + changed = max(h.added, h.removed) + delta = h.added - h.removed + if delta < 0: + ReportLine (h.name, -delta, (-delta*100.0)/TotalRemoved) + count += 1 + if count >= ListCount: + break + EndReport () + +def CompareEPCount (e1, e2): + return e2.count - e1.count + +def ReportByPCEmpl (elist): + elist.sort (CompareEPCount) + count = 0 + BeginReport ('Top changeset contributors by employer') + for e in elist: + if e.count != 0: + ReportLine (e.name, e.count, (e.count*100.0)/CSCount) + count += 1 + if count >= ListCount: + break + EndReport () + + + +def CompareELChanged (e1, e2): + return e2.changed - e1.changed + +def ReportByELChanged (elist): + elist.sort (CompareELChanged) + count = 0 + BeginReport ('Top lines changed by employer') + for e in elist: + if e.changed != 0: + ReportLine (e.name, e.changed, (e.changed*100.0)/TotalChanged) + count += 1 + if count >= ListCount: + break + EndReport () + + + +def CompareSOBs (h1, h2): + return len (h2.signoffs) - len (h1.signoffs) + +def ReportBySOBs (hlist): + hlist.sort (CompareSOBs) + totalsobs = 0 + for h in hlist: + totalsobs += len (h.signoffs) + count = 0 + BeginReport ('Developers with the most signoffs (total %d)' % totalsobs) + for h in hlist: + scount = len (h.signoffs) + if scount > 0: + ReportLine (h.name, scount, (scount*100.0)/totalsobs) + count += 1 + if count >= ListCount: + break + EndReport () + +def CompareESOBs (e1, e2): + return e2.sobs - e1.sobs + +def ReportByESOBs (elist): + elist.sort (CompareESOBs) + totalsobs = 0 + for e in elist: + totalsobs += e.sobs + count = 0 + BeginReport ('Employers with the most signoffs (total %d)' % totalsobs) + for e in elist: + if e.sobs > 0: + ReportLine (e.name, e.sobs, (e.sobs*100.0)/totalsobs) + count += 1 + if count >= ListCount: + break + EndReport () + +# +# Here starts the real program. Read the config files. +# +ConfigFile.ConfigFile (CFName) + +# +# Let's pre-seed the database with a couple of hackers +# we want to remember. +# +Linus = ('torvalds@linux-foundation.org', + LookupStoreHacker ('Linus Torvalds', 'torvalds@linux-foundation.org')) +Akpm = ('akpm@linux-foundation.org', + LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org')) + +NextLine = sys.stdin.readline () +TotalChanged = TotalAdded = TotalRemoved = 0 +ParseOpts () + +# +# Snarf changesets. +# +print 'Grabbing changesets...\r', +sys.stdout.flush () + +printcount = CSCount = 0 +while (1): + if (printcount % 50) == 0: + print 'Grabbing changesets...%d\r' % printcount, + sys.stdout.flush () + printcount += 1 + p = grabpatch() + if not p: + break + if p.added > 100000 or p.removed > 100000: + print 'Skipping massive add' + continue + if FileFilter and p.added == 0 and p.removed == 0: + continue + if not p.merge: + p.author.addpatch (p) + for sobemail, sob in p.sobs: + sob.addsob (p) + CSCount += 1 +print 'Grabbing changesets...done' + +if DumpDB: + database.DumpDB () +# +# Say something +# +hlist = database.AllHackers () +elist = database.AllEmployers () +Outfile.write ('Processed %d csets from %d developers\n' % (CSCount, + len (hlist))) +Outfile.write ('%d employers found\n' % len (elist)) +Outfile.write ('A total of %d lines added, %d removed (delta %d)\n' % + (TotalAdded, TotalRemoved, TotalAdded - TotalRemoved)) +if TotalChanged == 0: + TotalChanged = 1 # HACK to avoid div by zero +if DateStats: + PrintDateStats () + sys.exit(0) + +if DevReports: + ReportByPCount (hlist) + ReportByLChanged (hlist) + ReportByLRemoved (hlist) + ReportBySOBs (hlist) +ReportByPCEmpl (elist) +ReportByELChanged (elist) +ReportByESOBs (elist) diff --git a/gitdm.config b/gitdm.config new file mode 100644 index 0000000..588d6ef --- /dev/null +++ b/gitdm.config @@ -0,0 +1,22 @@ +# +# This is a sample gitdm configuration file. +# + +# +# EmailAliases lets us cope with developers who use more +# than one address. +# +EmailAliases sample-config/aliases + +# +# EmailMap does the main work of mapping addresses onto +# employers. +# +EmailMap sample-config/domain-map + +# +# Use GroupMap to map a file full of addresses to the +# same employer +# +# GroupMap sample-config/illuminati The Illuminati +# diff --git a/sample-config/aliases b/sample-config/aliases new file mode 100644 index 0000000..8cd50db --- /dev/null +++ b/sample-config/aliases @@ -0,0 +1,5 @@ +# +# This is the email aliases file, mapping secondary addresses +# onto a single, canonical address. +# +corbet@eklektix.com corbet@lwn.net diff --git a/sample-config/domain-map b/sample-config/domain-map new file mode 100644 index 0000000..bbd81f7 --- /dev/null +++ b/sample-config/domain-map @@ -0,0 +1,242 @@ +# +# Here is a set of mappings of domain names onto employer names. +# +8d.com 8D Technologies +aconex.com Aconex +adaptec.com Adaptec +aist.go.jp National Institute of Advanced Industrial Science and Technology +akamai.com Akamai Technologies +am.sony.com Sony +amd.com AMD +analog.com Analog Devices +arastra.com Arastra Inc +arm.com ARM +artecdesign.ee Artec Design +arvoo.nl ARVOO Engineering +atmel.com Atmel +atomide.com Atomide +avtrex.com Avtrex +axis.com Axis Communications +azingo.com Azingo +balabit.com BalaBit +balabit.hu BalaBit +baslerweb.com Basler Vision Technologies +bluehost.com Bluehost +bluewatersys.com Bluewater Systems +broadcom.com Broadcom +brontes3d.com Brontes Technologies +bull.net Bull SAS +cam.ac.uk University of Cambridge +ccur.com Concurrent Computer Corporation +celunite.com Azingo +chelsio.com Chelsio +cisco.com Cisco +citi.umich.edu Univ. of Michigan CITI +clusterfs.com Sun +cn.fujitsu.com Fujitsu +compulab.co.il CompuLab +computergmbh.de CC Computer Consultants +comx.dk ComX Networks +conectiva.com.br Mandriva +coraid.com Coraid +cosmosbay.com Cosmosbay~Vectis +cozybit.com cozybit +cray.com Cray +csr.com CSR +cyberguard.com Secure Computing +cybernetics.com Cybernetics +data.slu.se Uppsala University +dave.eu Dave S.r.l. +de.bosch.com Bosch +dell.com Dell +denx.de DENX Software Engineering +devicescape.com Devicescape +digi.com Digi International +dti2.net DTI2 - Desarrollo de la tecnologia de las comunicaciones +edesix.com Edesix Ltd +elandigitalsystems.com Elan Digital Systems +embeddedalley.com Embedded Alley Solutions +empirix.com Empirix +emulex.com Emulex +endrelia.com Endrelia +ericsson.com Ericsson +fixstars.com Fixstars Technologies +free-electrons.com Free Electrons +freescale.com Freescale +fujitsu.com Fujitsu +gaisler.com Gaisler Research +gefanuc.com GE Fanuc +geomatys.fr Geomatys +google.com Google +gvs.co.yu GVS +hansenpartnership.com Hansen Partnership +harris.com Harris Corporation +hauppauge.com Hauppauge +hermes-softlab.com HERMES SoftLab +hevs.ch HES-SO Valais Wallis +highpoint-tech.com HighPoint Technologies +hitachi.co.jp Hitachi +hitachi.com Hitachi +hitachisoft.jp Hitachi +hp.com HP +hvsistemas.es HV Sistemas +ibm.com IBM +ibp.de ipb (uk) Ltd. +icplus.com.tw IC Plus +igel.co.jp igel +inl.fr INL +inria.fr INRIA +intel.com Intel +iram.es IRAM +jmicron.com jmicron.com +jp.fujitsu.com Fujitsu +katalix.com Katalix Systems +keyspan.com InnoSys +laptop.org OLPC +laurelnetworks.com ECI Telecom +linutronix.de linutronix +linux-foundation.org Linux Foundation +lippert-at.de LiPPERT Embedded Computers GmbH +lippertembedded.de LiPPERT Embedded Computers GmbH +llnl.gov Lawrence Livermore National Laboratory +lnxi.com Linux Networx +logitech.com Logitech +lsi.com LSI Logic +lsil.com LSI Logic +lwn.net LWN.net +macqel.be Macq Electronique +macqel.com Macq Electronique +mandriva.com Mandriva +mandriva.com.br Mandriva +marvell.com Marvell +mellanox.co.il Mellanox +melware.de Cytronics & Melware +microgate.com MicroGate Systems +mips.com MIPS +miraclelinux.com Miracle Linux +mn-solutions.de M&N Solutions +moreton.com.au Secure Computing +motorola.com Motorola +movial.fi Movial +mvista.com MontaVista +myri.com Myricom +namesys.com NameSys +nec.co.jp NEC +nec.com NEC +netapp.com NetApp +neterion.com Neterion +netxen.com NetXen +niif.hu NIIF Institute +nokia.com Nokia +nomadgs.com Nomad Global Solutions +nortel.com Nortel +novell.com Novell +ntt.co.jp NTT +ntts.co.jp NTT +nuovasystems.com Nuova Systems +nvidia.com NVidia +obsidianresearch.com Obsidian Research +octant-fr.com Octant Informatique +onelan.co.uk ONELAN +onstor.com Onstor +openedhand.com OpenedHand +opengridcomputing.com Open Grid Computing +openmoko.org OpenMoko +openvz.org Parallels +oracle.com Oracle +ornl.gov Oak Ridge National Laboratory +osdl.org Linux Foundation +ozlabs.org IBM +panasas.com Panasas +panasonic.com Panasonic +papercut.bz PaperCut Software +papercut.com PaperCut Software +parallels.com Parallels +pasemi.com PA Semi Corporation +pengutronix.de Pengutronix +pheonix.com Phoeonix +philosys.de Philosys Software +pikron.com PiKRON s.r.o +pmc-sierra.com PMC-Sierra +promise.com Promise Technology +qlogic.com QLogic +qumranet.com Qumranet +realtek.com.tw Realtek +redhat.com Red Hat +renesas.com Renesas Technology +rockwell.com Rockwell +rowland.harvard.edu Rowland Institute, Harvard +rtr.ca Real-Time Remedies +samsung.com Samsung +sanpeople.com SANPeople +savantav.com Savant Systems +secretlab.ca Secretlab +securecomputing.com Secure Computing +semihalf.com Semihalf Embedded Systems +sf-tec.de Science Fiction Technologies +sgi.com SGI +sicortex.com Sicortex +siemens.com Siemens +sierrawireless.com Sierra Wireless +sigma-chemnitz.de SIGMA Chemnitz +snapgear.com Snapgear +solidboot.com Solid Boot Ltd. +sony.co.jp Sony +sony.com Sony +sonycom.com Sony +spidernet.net SpiderNet Services +st.com ST Microelectronics +stlinux.com ST Microelectronics +starentnetworks.com Starent Networks +steeleye.com SteelEye +sun.com Sun +suse.com Novell +suse.cz Novell +suse.de Novell +sw.ru Parallels +swsoft.com Parallels +tapsys.com Tapestry Systems +telargo.com Telargo +tensilica.com Tensilica +terascala.com Terascala +thinktube.com Thinktube +ti.com Texas Instruments +til-technologies.fr TIL Technologies +tls.msk.ru Telecom-Service +toptica.com TOPTICA Photonics +toshiba.co.jp Toshiba +total-knowledge.com Total Knowledge +towertech.it Tower Technologies +tpi.com TriplePoint +transitive.com Transitive +transmode.se Transmode Systems +tresys.com Tresys +tripeaks.co.jp Tripeaks +trustedcs.com Trusted Computer Solutions +tungstengraphics.com Tungsten Graphics +tycho.nsa.gov US National Security Agency +ubuntu.com Canonical +uhulinux.hu UHU-Linux +unisys.com Unisys +valinux.co.jp VA Linux Systems Japan +verismonetworks.com Verismo +veritas.com Veritas +via.com.tw Via +vivecode.com Vivecode +vmware.com VMWare +volkswagen.de Volkswagen +voltaire.com Voltaire +vyatta.com Vyatta +wabtec.com Wabtec Railway Electronics +wacom.com Wacom +winbond.com Winbond Electronics +winbond.com.tw Winbond Electronics +wincor-nixdorf.com Wincor Nixdorf +windriver.com Wind River +wipro.com Wipro +wolfsonmicro.com Wolfson Microelectronics +xensource.com XenSource +xiv.co.il XIV Information Systems +xivstorage.com XIV Information Systems +trinnov.com Trinnov Audio +citrix.com Citrix |