summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGermán Póo-Caamaño <gpoo@gnome.org>2011-06-22 19:08:12 -0700
committerGermán Póo-Caamaño <gpoo@gnome.org>2011-06-22 19:27:47 -0700
commit5964089840beaf313fff4189934ecc2dc714bdbd (patch)
tree9f09aa78205ad6717beb063a540f3f0f0fdf0c2a
parentcf1e69b859b10929c09743987d4b20294a2b1839 (diff)
Added CSV dumps: per filetype and per changeset
Two new dumps were added: per filetype and for every changeset. It necessary to set a prefix where to dump the data in csv, because it will be generated one csv file per file type. Now it is possible to get statistics per code, documentation, build scripts, translations, multimedia and developers documentation. This feature is useful for repositories where there are different types of file, rather than code. The detailed information does not use the Aggregate parameter. Signed-off-by: Germán Póo-Caamaño <gpoo@gnome.org>
-rw-r--r--README1
-rw-r--r--csvdump.py44
-rwxr-xr-xgitdm16
3 files changed, 56 insertions, 5 deletions
diff --git a/README b/README
index bc2d6b3..8d3922e 100644
--- a/README
+++ b/README
@@ -52,6 +52,7 @@ be:
-o file Write text output to the given file (default is stdout).
+ -p prefix Dump out the database categorized by changeset and by filetype.
-r pat Only generate statistics for changes to files whose
name matches the given regular expression.
diff --git a/csvdump.py b/csvdump.py
index 4e81954..b76a5f6 100644
--- a/csvdump.py
+++ b/csvdump.py
@@ -30,6 +30,48 @@ def AccumulatePatch (p, Aggregate):
stat = PeriodCommitHash[authdatekey]
stat.accumulate (p)
+ChangeSets = []
+FileTypes = []
+
+def store_patch(patch):
+ if not patch.merge:
+ employer = patch.author.emailemployer(patch.email, patch.date)
+ employer = employer.name.replace('"', '.').replace ('\\', '.')
+ author = patch.author.name.replace ('"', '.').replace ('\\', '.')
+ author = patch.author.name.replace ("'", '.')
+ try:
+ domain = patch.email.split('@')[1]
+ except:
+ domain = patch.email
+ ChangeSets.append([patch.commit, str(patch.date),
+ patch.email, domain, author, employer,
+ patch.added, patch.removed])
+ for (filetype, (added, removed)) in patch.filetypes.iteritems():
+ FileTypes.append([patch.commit, filetype, added, removed])
+
+
+def save_csv (prefix='data'):
+ # Dump the ChangeSets
+ if len(ChangeSets) > 0:
+ fd = open('%s-changesets.csv' % prefix, 'w')
+ writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC)
+ writer.writerow (['Commit', 'Date', 'Domain',
+ 'Email', 'Name', 'Affliation',
+ 'Added', 'Removed'])
+ for commit in ChangeSets:
+ writer.writerow(commit)
+
+ # Dump the file types
+ if len(FileTypes) > 0:
+ fd = open('%s-filetypes.csv' % prefix, 'w')
+ writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC)
+
+ writer.writerow (['Commit', 'Type', 'Added', 'Removed'])
+ for commit in FileTypes:
+ writer.writerow(commit)
+
+
+
def OutputCSV (file):
if file is None:
return
@@ -43,4 +85,4 @@ def OutputCSV (file):
writer.writerow ([author_name, stat.email, empl_name, stat.date,
stat.added, stat.removed])
-__all__ = [ 'OutputCSV' ]
+__all__ = [ 'AccumulatePatch', 'OutputCSV', 'store_patch' ]
diff --git a/gitdm b/gitdm
index 41b250c..fe5473c 100755
--- a/gitdm
+++ b/gitdm
@@ -35,6 +35,7 @@ DateStats = 0
AuthorSOBs = 1
FileFilter = None
CSVFile = None
+CSVPrefix = None
AkpmOverLt = 0
DumpDB = 0
CFName = 'gitdm.config'
@@ -54,6 +55,7 @@ Numstat = 0
# -l count Maximum length for output lists
# -n Use numstats instead of generated patch from git log
# -o file File for text output
+# -p prefix Prefix for CSV output
# -r pattern Restrict to files matching pattern
# -s Ignore author SOB lines
# -u Map unknown employers to '(Unknown)'
@@ -64,9 +66,9 @@ Numstat = 0
def ParseOpts ():
global MapUnknown, DevReports
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
- global CFName, CSVFile, DirName, Aggregate, Numstat
+ global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat
- opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:r:suwx:z')
+ opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:suwx:z')
for opt in opts:
if opt[0] == '-a':
AkpmOverLt = 1
@@ -86,6 +88,8 @@ def ParseOpts ():
Numstat = 1
elif opt[0] == '-o':
reports.SetOutput (open (opt[1], 'w'))
+ elif opt[0] == '-p':
+ CSVPrefix = opt[1]
elif opt[0] == '-r':
print 'Filter on "%s"' % (opt[1])
FileFilter = re.compile (opt[1])
@@ -418,6 +422,7 @@ for logpatch in patches:
hacker.addreport (p)
CSCount += 1
csvdump.AccumulatePatch (p, Aggregate)
+ csvdump.store_patch (p)
print >> sys.stderr, 'Grabbing changesets...done '
if DumpDB:
@@ -446,8 +451,11 @@ if TotalChanged == 0:
if DateStats:
PrintDateStats ()
-csvdump.OutputCSV (CSVFile)
-if CSVFile is not None:
+if CSVPrefix:
+ csvdump.save_csv (CSVPrefix)
+
+if CSVFile:
+ csvdump.OutputCSV (CSVFile)
CSVFile.close ()
if DevReports: