From 5964089840beaf313fff4189934ecc2dc714bdbd Mon Sep 17 00:00:00 2001 From: Germán Póo-Caamaño Date: Wed, 22 Jun 2011 19:08:12 -0700 Subject: Added CSV dumps: per filetype and per changeset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two new dumps were added: per filetype and for every changeset. It necessary to set a prefix where to dump the data in csv, because it will be generated one csv file per file type. Now it is possible to get statistics per code, documentation, build scripts, translations, multimedia and developers documentation. This feature is useful for repositories where there are different types of file, rather than code. The detailed information does not use the Aggregate parameter. Signed-off-by: Germán Póo-Caamaño --- README | 1 + csvdump.py | 44 +++++++++++++++++++++++++++++++++++++++++++- gitdm | 16 ++++++++++++---- 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/README b/README index bc2d6b3..8d3922e 100644 --- a/README +++ b/README @@ -52,6 +52,7 @@ be: -o file Write text output to the given file (default is stdout). + -p prefix Dump out the database categorized by changeset and by filetype. -r pat Only generate statistics for changes to files whose name matches the given regular expression. diff --git a/csvdump.py b/csvdump.py index 4e81954..b76a5f6 100644 --- a/csvdump.py +++ b/csvdump.py @@ -30,6 +30,48 @@ def AccumulatePatch (p, Aggregate): stat = PeriodCommitHash[authdatekey] stat.accumulate (p) +ChangeSets = [] +FileTypes = [] + +def store_patch(patch): + if not patch.merge: + employer = patch.author.emailemployer(patch.email, patch.date) + employer = employer.name.replace('"', '.').replace ('\\', '.') + author = patch.author.name.replace ('"', '.').replace ('\\', '.') + author = patch.author.name.replace ("'", '.') + try: + domain = patch.email.split('@')[1] + except: + domain = patch.email + ChangeSets.append([patch.commit, str(patch.date), + patch.email, domain, author, employer, + patch.added, patch.removed]) + for (filetype, (added, removed)) in patch.filetypes.iteritems(): + FileTypes.append([patch.commit, filetype, added, removed]) + + +def save_csv (prefix='data'): + # Dump the ChangeSets + if len(ChangeSets) > 0: + fd = open('%s-changesets.csv' % prefix, 'w') + writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC) + writer.writerow (['Commit', 'Date', 'Domain', + 'Email', 'Name', 'Affliation', + 'Added', 'Removed']) + for commit in ChangeSets: + writer.writerow(commit) + + # Dump the file types + if len(FileTypes) > 0: + fd = open('%s-filetypes.csv' % prefix, 'w') + writer = csv.writer (fd, quoting=csv.QUOTE_NONNUMERIC) + + writer.writerow (['Commit', 'Type', 'Added', 'Removed']) + for commit in FileTypes: + writer.writerow(commit) + + + def OutputCSV (file): if file is None: return @@ -43,4 +85,4 @@ def OutputCSV (file): writer.writerow ([author_name, stat.email, empl_name, stat.date, stat.added, stat.removed]) -__all__ = [ 'OutputCSV' ] +__all__ = [ 'AccumulatePatch', 'OutputCSV', 'store_patch' ] diff --git a/gitdm b/gitdm index 41b250c..fe5473c 100755 --- a/gitdm +++ b/gitdm @@ -35,6 +35,7 @@ DateStats = 0 AuthorSOBs = 1 FileFilter = None CSVFile = None +CSVPrefix = None AkpmOverLt = 0 DumpDB = 0 CFName = 'gitdm.config' @@ -54,6 +55,7 @@ Numstat = 0 # -l count Maximum length for output lists # -n Use numstats instead of generated patch from git log # -o file File for text output +# -p prefix Prefix for CSV output # -r pattern Restrict to files matching pattern # -s Ignore author SOB lines # -u Map unknown employers to '(Unknown)' @@ -64,9 +66,9 @@ Numstat = 0 def ParseOpts (): global MapUnknown, DevReports global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB - global CFName, CSVFile, DirName, Aggregate, Numstat + global CFName, CSVFile, CSVPrefix,DirName, Aggregate, Numstat - opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:r:suwx:z') + opts, rest = getopt.getopt (sys.argv[1:], 'ab:dc:Dh:l:no:p:r:suwx:z') for opt in opts: if opt[0] == '-a': AkpmOverLt = 1 @@ -86,6 +88,8 @@ def ParseOpts (): Numstat = 1 elif opt[0] == '-o': reports.SetOutput (open (opt[1], 'w')) + elif opt[0] == '-p': + CSVPrefix = opt[1] elif opt[0] == '-r': print 'Filter on "%s"' % (opt[1]) FileFilter = re.compile (opt[1]) @@ -418,6 +422,7 @@ for logpatch in patches: hacker.addreport (p) CSCount += 1 csvdump.AccumulatePatch (p, Aggregate) + csvdump.store_patch (p) print >> sys.stderr, 'Grabbing changesets...done ' if DumpDB: @@ -446,8 +451,11 @@ if TotalChanged == 0: if DateStats: PrintDateStats () -csvdump.OutputCSV (CSVFile) -if CSVFile is not None: +if CSVPrefix: + csvdump.save_csv (CSVPrefix) + +if CSVFile: + csvdump.OutputCSV (CSVFile) CSVFile.close () if DevReports: -- cgit v1.2.3