summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThorsten Behrens <Thorsten.Behrens@CIB.de>2016-03-13 02:43:03 +0100
committerThorsten Behrens <Thorsten.Behrens@CIB.de>2016-03-13 02:46:31 +0100
commit4d81e1341648627a7fecf4a0e364e8709a9f782b (patch)
treefaeff9cef9f9f0af9efce42cbe779a4c8a713db3
parentb1573f94b9f865c5a5647bd6f24fdb1c90298b6b (diff)
Cleanup gitdm patches, add two more features
This adds bugfix and review/tested-by reporting
-rw-r--r--gitdm-patches/0001-Add-exclude-functionality.patch (renamed from gitdm-patches/0002-Add-exclude-functionality.patch)6
-rw-r--r--gitdm-patches/0001-Export-hackers-individual-raw-data-as-CSV.patch140
-rw-r--r--gitdm-patches/0002-WIP-report-bugfixes.patch150
-rw-r--r--gitdm-patches/0003-also-report-bugfix-credits-by-employer.patch80
-rw-r--r--gitdm-patches/0004-Add-two-more-by-employer-reports.patch145
5 files changed, 378 insertions, 143 deletions
diff --git a/gitdm-patches/0002-Add-exclude-functionality.patch b/gitdm-patches/0001-Add-exclude-functionality.patch
index 47d6f566..5fe4b2b9 100644
--- a/gitdm-patches/0002-Add-exclude-functionality.patch
+++ b/gitdm-patches/0001-Add-exclude-functionality.patch
@@ -1,7 +1,7 @@
-From da4fc5c5acb6839dc91cb6174ece03a98464e824 Mon Sep 17 00:00:00 2001
+From 1e503647f7c8a06fa6859bc8c0c2628854ed26be Mon Sep 17 00:00:00 2001
From: Michael Meeks <michael.meeks@novell.com>
Date: Wed, 14 Sep 2011 01:12:00 +0200
-Subject: [PATCH 2/2] Add exclude functionality
+Subject: [PATCH 1/4] Add exclude functionality
---
README | 2 ++
@@ -137,5 +137,5 @@ index 61318ad..6b7e378 100755
TotalRemoved += p.removed
TotalChanged += max(p.added, p.removed)
--
-2.1.4
+2.6.4
diff --git a/gitdm-patches/0001-Export-hackers-individual-raw-data-as-CSV.patch b/gitdm-patches/0001-Export-hackers-individual-raw-data-as-CSV.patch
deleted file mode 100644
index 12c286e0..00000000
--- a/gitdm-patches/0001-Export-hackers-individual-raw-data-as-CSV.patch
+++ /dev/null
@@ -1,140 +0,0 @@
-From 848f32d7224a2ca877e95f6815b14f8a9fdd9e21 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?C=C3=A9dric=20Bosdonnat?= <cedricbosdo@openoffice.org>
-Date: Sun, 7 Feb 2016 00:19:08 +0100
-Subject: [PATCH 1/2] Export hackers individual raw data as CSV
-
-The data exported for each hacker are the date of the first commit,
-the date of the last commit, the commits count. These help distinguish
-regular core hackers from one-shot contributors.
----
- README | 3 +++
- csvdump.py | 16 +++++++++++++++-
- database.py | 8 +++++++-
- gitdm | 11 ++++++++++-
- 4 files changed, 35 insertions(+), 3 deletions(-)
-
-diff --git a/README b/README
-index dab372e..e60285d 100644
---- a/README
-+++ b/README
-@@ -46,6 +46,9 @@ be:
-
- -h file Generate HTML output to the given file
-
-+ -H file Export individual developer raw data as CSV. These data could be
-+ used to evaluate the fidelity of developers.
-+
- -l num Only list the top <num> entries in each report.
-
- -n Use --numstat instead of generated patches to get the statistics.
-diff --git a/csvdump.py b/csvdump.py
-index c3f6b5a..f0f5e6b 100644
---- a/csvdump.py
-+++ b/csvdump.py
-@@ -89,4 +89,18 @@ def OutputCSV (file):
- writer.writerow ([author_name, stat.email, empl_name, stat.date,
- stat.added, stat.removed, stat.changesets])
-
--__all__ = [ 'AccumulatePatch', 'OutputCSV', 'store_patch' ]
-+def OutputHackersCSV (file, hlist):
-+ if file is None:
-+ return
-+ file.write ("Name,Last affiliation,Activity Start,Activity End,Commits\n")
-+ for hacker in hlist:
-+ if len(hacker.patches) > 0:
-+ file.write ("\"%s\",%s,%s,%s,%d\n"%(hacker.name, \
-+ hacker.emailemployer (None, hacker.activity_end).name, \
-+ hacker.activity_start, hacker.activity_end, \
-+ len(hacker.patches)))
-+
-+__all__ = [ 'AccumulatePatch', 'OutputCSV', 'OutputHackersCSV', 'store_patch' ]
-+
-+
-+
-diff --git a/database.py b/database.py
-index cb242c1..bf13227 100644
---- a/database.py
-+++ b/database.py
-@@ -25,6 +25,8 @@ class Hacker:
- self.tested = [ ]
- self.reports = [ ]
- self.testcred = self.repcred = 0
-+ self.activity_start = datetime.date.max
-+ self.activity_end = datetime.date.min
- self.versions = [ ]
-
- def addemail (self, email, elist):
-@@ -34,7 +36,7 @@ class Hacker:
-
- def emailemployer (self, email, date):
- for i in range (0, len (self.email)):
-- if self.email[i] == email:
-+ if (email is None) or (self.email[i] == email):
- for edate, empl in self.employer[i]:
- if edate > date:
- return empl
-@@ -46,6 +48,10 @@ class Hacker:
- self.removed += patch.removed
- self.changed += max(patch.added, patch.removed)
- self.patches.append (patch)
-+ if patch.date < self.activity_start:
-+ self.activity_start = patch.date
-+ if patch.date > self.activity_end:
-+ self.activity_end= patch.date
-
- #
- # Note that the author is represented in this release.
-diff --git a/gitdm b/gitdm
-index c2b20cd..61318ad 100755
---- a/gitdm
-+++ b/gitdm
-@@ -36,6 +36,7 @@ AuthorSOBs = 1
- FileFilter = None
- CSVFile = None
- CSVPrefix = None
-+HackersCSV = None
- AkpmOverLt = 0
- DumpDB = 0
- CFName = 'gitdm.config'
-@@ -57,6 +58,7 @@ FileReport = None
- # -D Output date statistics
- # -f file Write touched-files report to <file>
- # -h hfile HTML output to hfile
-+# -H file Export individual developer raw data as CSV
- # -l count Maximum length for output lists
- # -n Use numstats instead of generated patch from git log
- # -o file File for text output
-@@ -75,8 +77,9 @@ def ParseOpts():
- global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
- global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat
- global ReportByFileType, ReportUnknowns, CompanyFilter, FileReport
-+ global HackersCSV
-
-- opts, rest = getopt.getopt(sys.argv[1:], 'ab:dC:c:Df:h:l:no:p:r:stUuwx:yz')
-+ opts, rest = getopt.getopt(sys.argv[1:], 'ab:dC:c:Df:H:h:l:no:p:r:stUuwx:yz')
- for opt in opts:
- if opt[0] == '-a':
- AkpmOverLt = 1
-@@ -94,6 +97,8 @@ def ParseOpts():
- FileReport = opt[1]
- elif opt[0] == '-h':
- reports.SetHTMLOutput(open(opt[1], 'w'))
-+ elif opt[0] == '-H':
-+ HackersCSV = open (opt[1], 'w')
- elif opt[0] == '-l':
- reports.SetMaxList(int(opt[1]))
- elif opt[0] == '-n':
-@@ -551,6 +556,10 @@ if TotalChanged == 0:
- if DateStats:
- PrintDateStats()
-
-+if HackersCSV:
-+ csvdump.OutputHackersCSV (HackersCSV, hlist);
-+ HackersCSV.close ()
-+
- if CSVPrefix:
- csvdump.save_csv(CSVPrefix)
-
---
-2.1.4
-
diff --git a/gitdm-patches/0002-WIP-report-bugfixes.patch b/gitdm-patches/0002-WIP-report-bugfixes.patch
new file mode 100644
index 00000000..b4339415
--- /dev/null
+++ b/gitdm-patches/0002-WIP-report-bugfixes.patch
@@ -0,0 +1,150 @@
+From 58240a83241b7bcf693a363b0bd19847afaf4a67 Mon Sep 17 00:00:00 2001
+From: Thorsten Behrens <Thorsten.Behrens@CIB.de>
+Date: Tue, 8 Mar 2016 12:31:10 +0100
+Subject: [PATCH 2/4] WIP: report bugfixes
+
+---
+ database.py | 3 +++
+ gitdm | 10 ++++++++++
+ patterns.py | 1 +
+ reports.py | 36 +++++++++++++++++++++++++++++++++++-
+ 4 files changed, 49 insertions(+), 1 deletion(-)
+
+diff --git a/database.py b/database.py
+index bf13227..a2b6727 100644
+--- a/database.py
++++ b/database.py
+@@ -22,6 +22,7 @@ class Hacker:
+ self.patches = [ ]
+ self.signoffs = [ ]
+ self.reviews = [ ]
++ self.bugfixes = [ ]
+ self.tested = [ ]
+ self.reports = [ ]
+ self.testcred = self.repcred = 0
+@@ -66,6 +67,8 @@ class Hacker:
+ self.signoffs.append (patch)
+ def addreview (self, patch):
+ self.reviews.append (patch)
++ def addbugfix (self, bug):
++ self.bugfixes.append (bug)
+ def addtested (self, patch):
+ self.tested.append (patch)
+ def addreport (self, patch):
+diff --git a/gitdm b/gitdm
+index 6b7e378..a7eee64 100755
+--- a/gitdm
++++ b/gitdm
+@@ -210,12 +210,16 @@ class patch:
+ self.reviews = [ ]
+ self.testers = [ ]
+ self.reports = [ ]
++ self.bugfixes = [ ]
+ self.filetypes = {}
+ self.files = [ ]
+
+ def addreviewer(self, reviewer):
+ self.reviews.append(reviewer)
+
++ def addbugfix(self, bug):
++ self.bugfixes.append(bug)
++
+ def addtester(self, tester):
+ self.testers.append(tester)
+
+@@ -294,6 +298,10 @@ def grabpatch(logpatch):
+ #
+ # Various other tags of interest.
+ #
++ m = patterns['bugfix'].match(Line)
++ if m:
++ p.addbugfix((m.group(2), m.group(3)))
++ continue
+ m = patterns['reviewed-by'].match(Line)
+ if m:
+ email = database.RemapEmail(m.group(2))
+@@ -527,6 +535,8 @@ for logpatch in patches:
+ sob.addsob(p)
+ for hacker in p.reviews:
+ hacker.addreview(p)
++ for bug in p.bugfixes:
++ p.author.addbugfix(bug)
+ for hacker in p.testers:
+ hacker.addtested(p)
+ for hacker in p.reports:
+diff --git a/patterns.py b/patterns.py
+index db36873..b346013 100644
+--- a/patterns.py
++++ b/patterns.py
+@@ -37,6 +37,7 @@ patterns = {
+ 'tested-by': re.compile (r'^\s+tested-by:' + _pemail + '.*$', re.I),
+ 'reported-by': re.compile (r'^\s+Reported-by:' + _pemail + '.*$', re.I),
+ 'reported-and-tested-by': re.compile (r'^\s+reported-and-tested-by:' + _pemail + '.*$', re.I),
++ 'bugfix': re.compile (r'(^|.* )(\w+)#(\d+) .*$', re.I),
+ #
+ # Merges are described with a variety of lines.
+ #
+diff --git a/reports.py b/reports.py
+index 1c707d0..48223fc 100644
+--- a/reports.py
++++ b/reports.py
+@@ -10,7 +10,7 @@
+ # Public License, version 2.
+ #
+
+-import sys
++import sys, database
+
+ Outfile = sys.stdout
+ HTMLfile = None
+@@ -296,6 +296,39 @@ def ReportByRepCreds(hlist):
+ EndReport()
+
+ #
++# Bugfixer reporting.
++#
++def CompareBugfixes(h1, h2):
++ return len(h2.bugfixes) - len(h1.bugfixes)
++
++def ReportByBugfixes(hlist):
++ # extract all bugfix types, flatten list then use unique set
++ bug_domains = set( [ j[0] for i in hlist for j in i.bugfixes ] )
++
++ # output stats separate for type
++ for domain in bug_domains:
++ filtered_hackers = [ ]
++ for hacker in hlist:
++ filtered_hacker = database.Hacker(hacker.name, None, None, None)
++ filtered_hacker.bugfixes = [ i for i in hacker.bugfixes if i[0] == domain ]
++ filtered_hackers.append( filtered_hacker )
++ filtered_hackers.sort(CompareBugfixes)
++ totalfixes = 0
++ for h in filtered_hackers:
++ totalfixes += len(h.bugfixes)
++ count = 0
++ BeginReport('Developers with the most bugfix credits for %s#xxx (total %d)' % (
++ domain, totalfixes))
++ for h in filtered_hackers:
++ scount = len(h.bugfixes)
++ if scount > 0:
++ ReportLine(h.name, scount, (scount*100.0)/totalfixes)
++ count += 1
++ if count >= ListCount:
++ break
++ EndReport()
++
++#
+ # Versions.
+ #
+ def CompareVersionCounts(h1, h2):
+@@ -373,6 +406,7 @@ def DevReports(hlist, totalchanged, cscount, totalremoved):
+ ReportByTestCreds(hlist)
+ ReportByReports(hlist)
+ ReportByRepCreds(hlist)
++ ReportByBugfixes(hlist)
+
+ def EmplReports(elist, totalchanged, cscount):
+ ReportByPCEmpl(elist, cscount)
+--
+2.6.4
+
diff --git a/gitdm-patches/0003-also-report-bugfix-credits-by-employer.patch b/gitdm-patches/0003-also-report-bugfix-credits-by-employer.patch
new file mode 100644
index 00000000..5c877b0d
--- /dev/null
+++ b/gitdm-patches/0003-also-report-bugfix-credits-by-employer.patch
@@ -0,0 +1,80 @@
+From 93ffbdc9d5fbe547325cf23de08093990e2ce9d7 Mon Sep 17 00:00:00 2001
+From: Thorsten Behrens <Thorsten.Behrens@CIB.de>
+Date: Tue, 8 Mar 2016 18:12:50 +0100
+Subject: [PATCH 3/4] also report bugfix credits by employer
+
+---
+ database.py | 2 ++
+ reports.py | 31 +++++++++++++++++++++++++++++++
+ 2 files changed, 33 insertions(+)
+
+diff --git a/database.py b/database.py
+index a2b6727..226ae05 100644
+--- a/database.py
++++ b/database.py
+@@ -165,11 +165,13 @@ class Employer:
+ self.added = self.removed = self.count = self.changed = 0
+ self.sobs = 0
+ self.hackers = [ ]
++ self.bugfixes = [ ]
+
+ def AddCSet (self, patch):
+ self.added += patch.added
+ self.removed += patch.removed
+ self.changed += max(patch.added, patch.removed)
++ self.bugfixes += patch.bugfixes
+ self.count += 1
+ if patch.author not in self.hackers:
+ self.hackers.append (patch.author)
+diff --git a/reports.py b/reports.py
+index 48223fc..535f502 100644
+--- a/reports.py
++++ b/reports.py
+@@ -395,6 +395,36 @@ def ReportByEHackers(elist):
+ break
+ EndReport()
+
++#
++# Bugfixer reporting.
++#
++def ReportByEBugfixes(elist):
++ # extract all bugfix types, flatten list then use unique set
++ bug_domains = set( [ j[0] for i in elist for j in i.bugfixes ] )
++
++ # output stats separate for type
++ for domain in bug_domains:
++ filtered_employers = [ ]
++ for e in elist:
++ filtered_e = database.Employer(e.name)
++ filtered_e.bugfixes = [ i for i in e.bugfixes if i[0] == domain ]
++ filtered_employers.append( filtered_e )
++ filtered_employers.sort(CompareBugfixes)
++ totalfixes = 0
++ for e in filtered_employers:
++ totalfixes += len(e.bugfixes)
++ count = 0
++ BeginReport('Employers with the most bugfixes for %s#xxx (total %d)' % (
++ domain, totalfixes))
++ for e in filtered_employers:
++ scount = len(e.bugfixes)
++ if scount > 0:
++ ReportLine(e.name, scount, (scount*100.0)/totalfixes)
++ count += 1
++ if count >= ListCount:
++ break
++ EndReport()
++
+
+ def DevReports(hlist, totalchanged, cscount, totalremoved):
+ ReportByPCount(hlist, cscount)
+@@ -413,6 +443,7 @@ def EmplReports(elist, totalchanged, cscount):
+ ReportByELChanged(elist, totalchanged)
+ ReportByESOBs(elist)
+ ReportByEHackers(elist)
++ ReportByEBugfixes(elist)
+
+ #
+ # Who are the unknown hackers?
+--
+2.6.4
+
diff --git a/gitdm-patches/0004-Add-two-more-by-employer-reports.patch b/gitdm-patches/0004-Add-two-more-by-employer-reports.patch
new file mode 100644
index 00000000..010a1ca4
--- /dev/null
+++ b/gitdm-patches/0004-Add-two-more-by-employer-reports.patch
@@ -0,0 +1,145 @@
+From bdc0abe0e86a9c5b681fe3c6767433f0f17c580e Mon Sep 17 00:00:00 2001
+From: Thorsten Behrens <Thorsten.Behrens@CIB.de>
+Date: Sun, 13 Mar 2016 02:38:35 +0100
+Subject: [PATCH 4/4] Add two more by-employer reports
+
+---
+ database.py | 9 +++++++++
+ gitdm | 16 ++++++++++++----
+ reports.py | 40 +++++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 60 insertions(+), 5 deletions(-)
+
+diff --git a/database.py b/database.py
+index 226ae05..97243e2 100644
+--- a/database.py
++++ b/database.py
+@@ -164,6 +164,8 @@ class Employer:
+ self.name = name
+ self.added = self.removed = self.count = self.changed = 0
+ self.sobs = 0
++ self.tests = 0
++ self.reviews = 0
+ self.hackers = [ ]
+ self.bugfixes = [ ]
+
+@@ -179,6 +181,13 @@ class Employer:
+ def AddSOB (self):
+ self.sobs += 1
+
++ def AddReview (self):
++ self.reviews += 1
++
++ def AddTest (self):
++ self.tests += 1
++
++
+ Employers = { }
+
+ def GetEmployer (name):
+diff --git a/gitdm b/gitdm
+index a7eee64..fa41505 100755
+--- a/gitdm
++++ b/gitdm
+@@ -305,12 +305,14 @@ def grabpatch(logpatch):
+ m = patterns['reviewed-by'].match(Line)
+ if m:
+ email = database.RemapEmail(m.group(2))
+- p.addreviewer(LookupStoreHacker(m.group(1), email))
++ reviewer = LookupStoreHacker(m.group(1), email)
++ p.addreviewer((email, LookupStoreHacker(m.group(1), m.group(2))))
+ continue
+ m = patterns['tested-by'].match(Line)
+ if m:
+ email = database.RemapEmail(m.group(2))
+- p.addtester(LookupStoreHacker(m.group(1), email))
++ tester = LookupStoreHacker(m.group(1), email)
++ p.addtester((email, LookupStoreHacker(m.group(1), m.group(2))))
+ p.author.testcredit(patch)
+ continue
+ # Reported-by:
+@@ -528,16 +530,22 @@ for logpatch in patches:
+ for sobemail, sobber in p.sobs:
+ empl = sobber.emailemployer(sobemail, p.date)
+ empl.AddSOB()
++ for revemail, reviewer in p.reviews:
++ empl = reviewer.emailemployer(revemail, p.date)
++ empl.AddReview()
++ for testemail, tester in p.testers:
++ empl = tester.emailemployer(testemail, p.date)
++ empl.AddTest()
+
+ if not p.merge:
+ p.author.addpatch(p)
+ for sobemail, sob in p.sobs:
+ sob.addsob(p)
+- for hacker in p.reviews:
++ for hackemail, hacker in p.reviews:
+ hacker.addreview(p)
+ for bug in p.bugfixes:
+ p.author.addbugfix(bug)
+- for hacker in p.testers:
++ for hackemail, hacker in p.testers:
+ hacker.addtested(p)
+ for hacker in p.reports:
+ hacker.addreport(p)
+diff --git a/reports.py b/reports.py
+index 535f502..992fc63 100644
+--- a/reports.py
++++ b/reports.py
+@@ -375,7 +375,43 @@ def ReportByESOBs(elist):
+ if count >= ListCount:
+ break
+ EndReport()
+-
++
++def CompareERevs(e1, e2):
++ return e2.reviews - e1.reviews
++
++def ReportByERevs(elist):
++ elist.sort(CompareERevs)
++ totalrevs = 0
++ for e in elist:
++ totalrevs += e.reviews
++ count = 0
++ BeginReport('Employers with the most reviews (total %d)' % totalrevs)
++ for e in elist:
++ if e.reviews > 0:
++ ReportLine(e.name, e.reviews, (e.reviews*100.0)/totalrevs)
++ count += 1
++ if count >= ListCount:
++ break
++ EndReport()
++
++def CompareETests(e1, e2):
++ return e2.tests - e1.tests
++
++def ReportByETests(elist):
++ elist.sort(CompareETests)
++ totaltests = 0
++ for e in elist:
++ totaltests += e.tests
++ count = 0
++ BeginReport('Employers with the most tested-by (total %d)' % totaltests)
++ for e in elist:
++ if e.tests > 0:
++ ReportLine(e.name, e.tests, (e.tests*100.0)/totaltests)
++ count += 1
++ if count >= ListCount:
++ break
++ EndReport()
++
+ def CompareHackers(e1, e2):
+ return len(e2.hackers) - len(e1.hackers)
+
+@@ -442,6 +478,8 @@ def EmplReports(elist, totalchanged, cscount):
+ ReportByPCEmpl(elist, cscount)
+ ReportByELChanged(elist, totalchanged)
+ ReportByESOBs(elist)
++ ReportByERevs(elist)
++ ReportByETests(elist)
+ ReportByEHackers(elist)
+ ReportByEBugfixes(elist)
+
+--
+2.6.4
+