summaryrefslogtreecommitdiff
path: root/gitdm-patches/0001-Export-hackers-individual-raw-data-as-CSV.patch
blob: 4fa81e68fd5f0a950856f376e39234ecad23d3e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
From 82cb8cc64e7e2d744bbf65673e61979160a5b767 Mon Sep 17 00:00:00 2001
From: Cedric Bosdonnat <cedric.bosdonnat.ooo@free.fr>
Date: Wed, 14 Sep 2011 00:55:30 +0200
Subject: [PATCH 1/2] Export hackers individual raw data as CSV

The data exported for each hacker are the date of the first commit,
the date of the last commit, the commits count. These help distinguish
regular core hackers from one-shot contributors.
---
 README      |    3 +++
 csvdump.py  |   16 +++++++++++++++-
 database.py |    8 +++++++-
 gitdm       |   15 +++++++++++----
 4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/README b/README
index dab372e..e60285d 100644
--- a/README
+++ b/README
@@ -46,6 +46,9 @@ be:
 
 	-h file	Generate HTML output to the given file
 
+    -H file Export individual developer raw data as CSV. These data could be
+        used to evaluate the fidelity of developers.
+
 	-l num	Only list the top <num> entries in each report.
 
     -n  Use --numstat instead of generated patches to get the statistics.
diff --git a/csvdump.py b/csvdump.py
index b76a5f6..66b8a12 100644
--- a/csvdump.py
+++ b/csvdump.py
@@ -85,4 +85,18 @@ def OutputCSV (file):
         writer.writerow ([author_name, stat.email, empl_name, stat.date,
                           stat.added, stat.removed])
 
-__all__ = [  'AccumulatePatch', 'OutputCSV', 'store_patch' ]
+def OutputHackersCSV (file, hlist):
+    if file is None:
+        return
+    file.write ("Name,Last affiliation,Activity Start,Activity End,Commits\n")
+    for hacker in hlist:
+        if len(hacker.patches) > 0:
+            file.write ("\"%s\",%s,%s,%s,%d\n"%(hacker.name, \
+                        hacker.emailemployer (None, hacker.activity_end).name, \
+                        hacker.activity_start, hacker.activity_end, \
+                        len(hacker.patches)))
+
+__all__ = [  'AccumulatePatch', 'OutputCSV', 'OutputHackersCSV', 'store_patch' ]
+
+
+
diff --git a/database.py b/database.py
index d8c8095..a8677e0 100644
--- a/database.py
+++ b/database.py
@@ -25,6 +25,8 @@ class Hacker:
         self.tested = [ ]
         self.reports = [ ]
         self.testcred = self.repcred = 0
+        self.activity_start = datetime.date.max
+        self.activity_end = datetime.date.min
 
     def addemail (self, email, elist):
         self.email.append (email)
@@ -33,7 +35,7 @@ class Hacker:
 
     def emailemployer (self, email, date):
         for i in range (0, len (self.email)):
-            if self.email[i] == email:
+            if (email is None) or (self.email[i] == email):
                 for edate, empl in self.employer[i]:
                     if edate > date:
                         return empl
@@ -44,6 +46,10 @@ class Hacker:
         self.added += patch.added
         self.removed += patch.removed
         self.patches.append (patch)
+        if patch.date < self.activity_start:
+            self.activity_start = patch.date
+        if patch.date > self.activity_end:
+            self.activity_end= patch.date
 
     #
     # There's got to be a better way.
diff --git a/gitdm b/gitdm
index 41634e6..774b059 100755
--- a/gitdm
+++ b/gitdm
@@ -36,6 +36,7 @@ AuthorSOBs = 1
 FileFilter = None
 CSVFile = None
 CSVPrefix = None
+HackersCSV = None
 AkpmOverLt = 0
 DumpDB = 0
 CFName = 'gitdm.config'
@@ -53,6 +54,7 @@ ReportByFileType = 0
 # -d		Output individual developer stats
 # -D		Output date statistics
 # -h hfile	HTML output to hfile
+# -H file   Export individual developer raw data as CSV
 # -l count	Maximum length for output lists
 # -n        Use numstats instead of generated patch from git log
 # -o file	File for text output
@@ -67,10 +69,10 @@ ReportByFileType = 0
 def ParseOpts ():
     global MapUnknown, DevReports
     global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
-    global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat
-    global ReportByFileType
+    global CFName, CSVFile, CSVPrefix, HackersCSV, DirName,
+    global Aggregate, Numstat, ReportByFileType
 
-    opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:stuwx:z')
+    opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:H:l:no:p:r:stuwx:z')
     for opt in opts:
         if opt[0] == '-a':
             AkpmOverLt = 1
@@ -84,6 +86,8 @@ def ParseOpts ():
             DateStats = 1
         elif opt[0] == '-h':
             reports.SetHTMLOutput (open (opt[1], 'w'))
+        elif opt[0] == '-H':
+            HackersCSV = open (opt[1], 'w')
         elif opt[0] == '-l':
             reports.SetMaxList (int (opt[1]))
         elif opt[0] == '-n':
@@ -108,7 +112,6 @@ def ParseOpts ():
             Aggregate = 'week'
         elif opt[0] == '-z':
             DumpDB = 1
-        
 
 
 def LookupStoreHacker (name, email):
@@ -480,6 +483,10 @@ if TotalChanged == 0:
 if DateStats:
     PrintDateStats ()
 
+if HackersCSV:
+    csvdump.OutputHackersCSV (HackersCSV, hlist);
+    HackersCSV.close ()
+
 if CSVPrefix:
     csvdump.save_csv (CSVPrefix)
 
-- 
1.7.3.4