summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlmari Lauhakangas <ilmari.lauhakangas@libreoffice.org>2024-04-11 22:50:48 +0300
committerIlmari Lauhakangas <ilmari.lauhakangas@libreoffice.org>2024-04-11 21:59:57 +0200
commita0176ee710a845567fff9c61608bd2393160c895 (patch)
tree3dbe56a01e1eb2d2c827f77e410a1c80efe78a3c
parent70ce608fbfd5193f59b16b72b6a0e22fa60b3613 (diff)
regression-hotspots: optimise by running git log only once instead of
eight thousand times. Takes running time from 10 hours to 30 seconds for me. Also more accurate results as previously the bug tracker ID in the commit message was accepted even when only appearing after the first line. Change-Id: I75f77eb0e3f5c884f35d639608752225f5085c4c Reviewed-on: https://gerrit.libreoffice.org/c/dev-tools/+/166014 Tested-by: Ilmari Lauhakangas <ilmari.lauhakangas@libreoffice.org> Reviewed-by: Ilmari Lauhakangas <ilmari.lauhakangas@libreoffice.org>
-rwxr-xr-xscripts/regression-hotspots.py23
1 files changed, 19 insertions, 4 deletions
diff --git a/scripts/regression-hotspots.py b/scripts/regression-hotspots.py
index 6ea80ba6..8a4fb76d 100755
--- a/scripts/regression-hotspots.py
+++ b/scripts/regression-hotspots.py
@@ -8,6 +8,7 @@
#
# Uses https://github.com/gitpython-developers/GitPython
# Results published in https://wiki.documentfoundation.org/Development/RegressionHotspots
+# Run in LibreOffice core directory. Shouldn't take more than a minute.
import sys
import re
@@ -18,7 +19,7 @@ from urllib.request import urlopen, URLError
from io import BytesIO
def get_fixed_regression_bugs():
- url = 'https://bugs.documentfoundation.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&bug_status=NEEDINFO&bug_status=PLEASETEST&columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&list_id=354018&product=LibreOffice&query_format=advanced&resolution=FIXED&ctype=csv&human=0'
+ url = 'https://bugs.documentfoundation.org/buglist.cgi?columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&product=LibreOffice&resolution=FIXED&ctype=csv&human=0'
ctx = ssl.create_default_context()
ctx.check_hostname = False
@@ -57,9 +58,23 @@ if __name__ == '__main__':
fixed_regression_ids = get_fixed_regression_bugs()
sys.stderr.write('found %d fixed regressions: %s\n' % (len(fixed_regression_ids), fixed_regression_ids))
- for bug_id in fixed_regression_ids:
- sys.stderr.write('working on bug %d\n' % bug_id)
- lognames = git.Git('.').execute(['git', 'log', '--grep=[fdo|tdf]#'+str(bug_id), '--pretty=tformat:', '--name-only'])
+ # build a dictionary of hashes and bug IDs from all commits targeting a report in FDO/TDF Bugzilla
+ gitbugs = {}
+ buglog = git.Git('.').execute(['git', 'log', '--grep=(fdo|tdf)#', '-E', '--oneline', '--since=1.10.2010'])
+ if buglog:
+ for line in buglog.split('\n'):
+ githash = line.partition(' ')[0]
+ # the regex search will ignore any commits hit by the grep where fdo|tdf# occurred below
+ # the first line - this is desirable as the referred bug ID should appear in the subject line
+ bugid = re.search(r"(?:fdo|tdf)#([0-9]+)", line)
+ if bugid:
+ gitbugs[githash] = int(bugid.group(1))
+
+ # filter by the bug IDs we got from the Bugzilla query
+ regression_hashes = [key for key, value in gitbugs.items() if value in fixed_regression_ids]
+
+ for githash in regression_hashes:
+ lognames = git.Git('.').execute(['git', 'show', githash, '--pretty=tformat:', '--name-only'])
if lognames:
for filename in lognames.split('\n'):
if not excluderegex.search(filename):