diff options
author | Ilmari Lauhakangas <ilmari.lauhakangas@libreoffice.org> | 2024-04-11 22:50:48 +0300 |
---|---|---|
committer | Ilmari Lauhakangas <ilmari.lauhakangas@libreoffice.org> | 2024-04-11 21:59:57 +0200 |
commit | a0176ee710a845567fff9c61608bd2393160c895 (patch) | |
tree | 3dbe56a01e1eb2d2c827f77e410a1c80efe78a3c | |
parent | 70ce608fbfd5193f59b16b72b6a0e22fa60b3613 (diff) |
regression-hotspots: optimise by running git log only once instead of
eight thousand times. Takes running time from 10 hours to 30 seconds for me.
Also more accurate results as previously the bug tracker ID in the commit
message was accepted even when only appearing after the first line.
Change-Id: I75f77eb0e3f5c884f35d639608752225f5085c4c
Reviewed-on: https://gerrit.libreoffice.org/c/dev-tools/+/166014
Tested-by: Ilmari Lauhakangas <ilmari.lauhakangas@libreoffice.org>
Reviewed-by: Ilmari Lauhakangas <ilmari.lauhakangas@libreoffice.org>
-rwxr-xr-x | scripts/regression-hotspots.py | 23 |
1 files changed, 19 insertions, 4 deletions
diff --git a/scripts/regression-hotspots.py b/scripts/regression-hotspots.py index 6ea80ba6..8a4fb76d 100755 --- a/scripts/regression-hotspots.py +++ b/scripts/regression-hotspots.py @@ -8,6 +8,7 @@ # # Uses https://github.com/gitpython-developers/GitPython # Results published in https://wiki.documentfoundation.org/Development/RegressionHotspots +# Run in LibreOffice core directory. Shouldn't take more than a minute. import sys import re @@ -18,7 +19,7 @@ from urllib.request import urlopen, URLError from io import BytesIO def get_fixed_regression_bugs(): - url = 'https://bugs.documentfoundation.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&bug_status=NEEDINFO&bug_status=PLEASETEST&columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&list_id=354018&product=LibreOffice&query_format=advanced&resolution=FIXED&ctype=csv&human=0' + url = 'https://bugs.documentfoundation.org/buglist.cgi?columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&product=LibreOffice&resolution=FIXED&ctype=csv&human=0' ctx = ssl.create_default_context() ctx.check_hostname = False @@ -57,9 +58,23 @@ if __name__ == '__main__': fixed_regression_ids = get_fixed_regression_bugs() sys.stderr.write('found %d fixed regressions: %s\n' % (len(fixed_regression_ids), fixed_regression_ids)) - for bug_id in fixed_regression_ids: - sys.stderr.write('working on bug %d\n' % bug_id) - lognames = git.Git('.').execute(['git', 'log', '--grep=[fdo|tdf]#'+str(bug_id), '--pretty=tformat:', '--name-only']) + # build a dictionary of hashes and bug IDs from all commits targeting a report in FDO/TDF Bugzilla + gitbugs = {} + buglog = git.Git('.').execute(['git', 'log', '--grep=(fdo|tdf)#', '-E', '--oneline', '--since=1.10.2010']) + if buglog: + for line in buglog.split('\n'): + githash = line.partition(' ')[0] + # the regex search will ignore any commits hit by the grep where fdo|tdf# occurred below + # the first line - this is desirable as the referred bug ID should appear in the subject line + bugid = re.search(r"(?:fdo|tdf)#([0-9]+)", line) + if bugid: + gitbugs[githash] = int(bugid.group(1)) + + # filter by the bug IDs we got from the Bugzilla query + regression_hashes = [key for key, value in gitbugs.items() if value in fixed_regression_ids] + + for githash in regression_hashes: + lognames = git.Git('.').execute(['git', 'show', githash, '--pretty=tformat:', '--name-only']) if lognames: for filename in lognames.split('\n'): if not excluderegex.search(filename): |