#!/usr/bin/env python3 # # This file is part of the LibreOffice project. # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. # # qawrangler-stats.py # # Returns statistics of most active wranglers and reporters for a given # month. # # For usage information, run: # qawrangler-stats.py -h # import sys import re import datetime import gzip import argparse import csv from urllib.request import urlopen, URLError from io import BytesIO from collections import Counter, OrderedDict URL = 'https://lists.freedesktop.org/archives/libreoffice-bugs/{}.txt.gz' ENTITIES = OrderedDict({ 'changers': re.compile(r'^(.+)\schanged:$', re.MULTILINE), 'reporters': re.compile(r'^\s*Reporter:\s(.+)$', re.MULTILINE), 'commentators': re.compile(r'^--- Comment #\d+ from (.+) ---$', re.MULTILINE), }) def get_parser(): """Returns an argparse instance, setting the arguments for the script""" parser = argparse.ArgumentParser( description='LibreOffice contributor statistics') parser.add_argument('-m', '--month', dest='month', type=int, default=datetime.date.today().month, help='month to generate statistics from (default is current month)') parser.add_argument('-y', '--year', dest='year', type=int, default=datetime.date.today().year, help='year to generate statistics from (default is current year)') parser.add_argument('-n', '--num', dest='num', type=int, default=None, help='number of top contributors of each category (default is all)') parser.add_argument('--csv', dest='csv', action='store_true', help='output information in CSV format') return parser def get_fname(date): """Returns the `Libreoffice-bugs Archives' file name for a given a @date datetime object. Note that only year and month are relevant, day is ignored""" return '{}-{}'.format(date.year, date.strftime('%B')) def get_data(url): """Fetches and uncompresses the `Libreoffice-bugs Archives' file given its @url. The return of the function is the content of the gile as a string""" try: resp = urlopen(url) except URLError: sys.stderr.write('Error fetching {}'.format(url)) sys.exit(1) else: with gzip.GzipFile(fileobj=BytesIO(resp.read())) as f: return f.read().decode('utf-8') def get_entity_values(data, pattern, num): """Returns the first @num matches of a @pattern in the @data string. If @num is None, all matches are returned""" values = re.findall(pattern, data) return Counter(values).most_common(num) def nice_print(values_dict, num_output, date): """Prints to stdout the output of the script in a human readable way. @values_dict is a dict containing a key for each entity (e.g. wranglers, reporters, etc), and as values, a list of tuples containing the name and the number of occurrences. An example: >>> { >>> 'wranglers': [ >>> ('Wrangler 1 ', 30), >>> # 30 is the number of times he wrangled >>> ('Wrangler 2 ', 15), >>> ] >>> } @num_output is the number of top values in each categories are requested to be displayed (e.g. number of top wranglers), and @date is a datetime object containing the requested year and month""" print('=== {} ==='.format(date.strftime('%B %Y'))) print() for name, values in values_dict.items(): print('--- Top {} {} ---'.format(num_output or '', name)) print('\n'.join('{0:75}{1:5d}'.format(*v) for v in values)) print() def csv_print(values_dict): """Print to stdout the output of the script in CSV format. @values_dict has the same format as for the `nice_print' function. The CSV file has the default format for Python's csv module (comma delimited, strings quoted when necessary)""" writer = csv.writer(sys.stdout) for entity_name, values in values_dict.items(): for val_name, val_count in values: writer.writerow([entity_name, val_name, val_count]) def main(args): """Main function of the program. * Fetches the file for the requested month and date * For each defined entity, gathers each match of its pattern, and counts the number of occurrences * Prints the retrieved information to stdout in the requested format """ date = datetime.date(args.year, args.month, 1) fname = get_fname(date) url = URL.format(fname) data = get_data(url) values = OrderedDict() for name, regex in ENTITIES.items(): values[name] = get_entity_values(data, regex, args.num) if args.csv: csv_print(values) else: nice_print(values, args.num, date) if __name__ == '__main__': parser = get_parser() args = parser.parse_args() main(args)