#!/usr/bin/env python3 import re import argparse import math import statistics from scipy import stats import numpy as np def higher_is_better(m): return m in ["threads", "waves", "maxwaves"] def get_results(filename, args): file = open(filename, "r") lines = file.read().split('\n') results = {} results["time"] = 0 time_match = re.compile(r"Thread \S+ took (\S+) seconds") re_match = re.compile(r"(\S+) - (.*) shader: (.*)") nv_match = re.compile(r"(\S+) - type: ([^,]*), (.*)") for line in lines: match = re.search(time_match, line) if match is not None: results["time"] = results["time"] + float(match.group(1)) continue nv_format = False match = re.search(re_match, line) if match is None: match = re.search(nv_match, line) if match is None: continue nv_format = True groups = match.groups() app = groups[0] stage = groups[1] stats = groups[2] if nv_format and stage.isdecimal(): stage = ["VS", "FS", "GS", "TCS", "TES", "CS"][int(stage)] if args.stage and args.stage != stage: continue result_group = {} for stat in stats.split(', '): name = "" val = 0 if nv_format: stat_split = stat.split(": ") name = stat_split[0] val = stat_split[1] else: stat_split_spaces = stat.split(' ') if stat_split_spaces[0] == "scheduled": name = stat_split_spaces[0] val = stat_split_spaces[3] # Skipping "Promoted 0 constants" and "compacted..." on i965. # We should probably change "compacted" to just a shader size # in bytes. elif len(stat_split_spaces) != 2: continue else: name = stat_split_spaces[1] val = stat_split_spaces[0] if name == "inst": name = "instructions" if name == "spills:fills": (spills, fills) = val.split(':') result_group['spills'] = int(spills) result_group['fills'] = int(fills) elif val.isnumeric(): result_group[name] = int(val) else: result_group[name] = val results[(app, stage)] = result_group return results def format_percent(frac): """Converts a factional value (typically 0.0 to 1.0) to a string as a percentage""" if abs(frac) > 0.0 and abs(frac) < 0.0001: return "<.01%" else: return "{:.2f}%".format(frac * 100) def format_num(n): assert n >= 0 if n - math.floor(n) < 0.01: return str(math.floor(n)) else: return "{:.2f}".format(n) def get_delta(b, a): if b != 0 and a != 0: frac = float(a) / float(b) - 1.0 return ' ({})'.format(format_percent(frac)) else: return '' def change(b, a): return format_num(b) + " -> " + format_num(a) + get_delta(b, a) def get_result_string(p, b, a): p = p + ": " while len(p) < 50: p = p + ' ' return p + change(b, a) def get_spill_fill_if_change(m, b, a): if not b.get("spills") or not b.get("fills"): return '' if m in ("spills", "fills"): return '' if b["spills"] == a["spills"] and b["fills"] == a["fills"]: return '' return " (spills: " + change(b["spills"], a["spills"]) + "; fills: " + change(b["fills"], a["fills"]) + ")" def get_sched_mode(b, a): if not b.get("scheduled"): return '' p = " (scheduled: " + b["scheduled"] if b["scheduled"] == a["scheduled"]: return p + ")" return p + " -> " + a["scheduled"] + ")" def split_list(string): return string.split(",") def gather_statistics(changes, before, after, m): stats = (0.0, 0, 0.0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0) if changes: absolute = [abs(before[p][m] - after[p][m]) for p in changes] relative = [0 if before[p][m] == 0 else abs(before[p][m] - after[p][m]) / before[p][m] for p in changes] stats = (statistics.mean(absolute), statistics.median(absolute), min(absolute), max(absolute), statistics.mean(relative), statistics.median(relative), min(relative), max(relative)) return stats def mean_confidence_interval(data, confidence=0.95): a = 1.0 * np.array(data) n = len(a) m, se = np.mean(a), stats.sem(a) h = se * stats.t.ppf((1 + confidence) / 2., n - 1) return m, m - h, m + h def main(): parser = argparse.ArgumentParser() parser.add_argument("--measurements", "-m", type=split_list, help="comma-separated list of measurements to report") parser.add_argument("--summary-only", "-s", action="store_true", default=False, help="do not show the per-shader helped / hurt data") parser.add_argument("--changes-only", "-c", action="store_true", default=False, help="only show measurements that have changes") parser.add_argument("--stage", "-S", help="limit results to specified shader stage") parser.add_argument("--ignore-loops", action="store_true", help="ignore loops", default=False) parser.add_argument("before", help="the output of the original code") parser.add_argument("after", help="the output of the new code") args = parser.parse_args() before = get_results(args.before, args) after = get_results(args.after, args) # Grab these and remove them from the dictionary time_before = before.pop("time") time_after = after.pop("time") total_before = {} total_after = {} affected_before = {} affected_after = {} num_hurt = {} num_helped = {} helped_statistics = {} hurt_statistics = {} confidence_interval = {} # If no set of measurements is specified, pick an arbitrary shader and use # the ones it has. The assumption is that all shaders will have had the # same set of measurements printed if not args.measurements: args.measurements = [] for p in before: for m in before[p]: args.measurements.append(m) break for m in args.measurements: if m == "scheduled": continue if m == "inst": m = "instructions" total_before[m] = 0 total_after[m] = 0 affected_before[m] = 0 affected_after[m] = 0 helped = [] hurt = [] for p in before: before_count = before[p][m] if after.get(p) is None: continue # If the number of loops changed, then we may have unrolled some # loops, in which case other measurements will be misleading. if m != "loops" and "loops" in before[p] and before[p]["loops"] != after[p]["loops"] and not args.ignore_loops: continue after_count = after[p][m] total_before[m] += before_count total_after[m] += after_count if before_count != after_count: affected_before[m] += before_count affected_after[m] += after_count if (after_count > before_count) ^ higher_is_better(m): hurt.append(p) else: helped.append(p) if not args.summary_only: helped.sort( key=lambda k: after[k][m] if before[k][m] == 0 else float(before[k][m] - after[k][m]) / before[k][m]) for p in helped: namestr = p[0] + " " + p[1] print(m + " helped: " + get_result_string(namestr, before[p][m], after[p][m]) + get_spill_fill_if_change(m, before[p], after[p]) + get_sched_mode(before[p], after[p])) if helped: print("") hurt.sort( key=lambda k: after[k][m] if before[k][m] == 0 else float(after[k][m] - before[k][m]) / before[k][m]) for p in hurt: namestr = p[0] + " " + p[1] print(m + " HURT: " + get_result_string(namestr, before[p][m], after[p][m]) + get_spill_fill_if_change(m, before[p], after[p]) + get_sched_mode(before[p], after[p])) if hurt: print("") helped_statistics[m] = gather_statistics(helped, before, after, m) hurt_statistics[m] = gather_statistics(hurt, before, after, m) num_helped[m] = len(helped) num_hurt[m] = len(hurt) # Statistics for spills and fills is usually meaningless. if m in ["spills", "fills"]: continue if num_hurt[m] + num_helped[m] > 3: A = [after[p][m] - before[p][m] for p in helped + hurt] B = [0 if before[p][m] == 0 else (after[p][m] - before[p][m]) / before[p][m] for p in helped + hurt] confidence_interval[m] = (mean_confidence_interval(A), mean_confidence_interval(B)) lost = [] gained = [] for p in before: if after.get(p) is None: lost.append(p[0] + " " + p[1]) for p in after: if before.get(p) is None: gained.append(p[0] + " " + p[1]) if not args.summary_only: lost.sort() for p in lost: print("LOST: " + p) if lost: print("") gained.sort() for p in gained: print("GAINED: " + p) if gained: print("") any_helped_or_hurt = False for m in args.measurements: if m == "scheduled": continue if m == "inst": m = "instructions" if num_helped[m] > 0 or num_hurt[m] > 0: any_helped_or_hurt = True if num_helped[m] > 0 or num_hurt[m] > 0 or not args.changes_only: print("total {0} in shared programs: {1}\n" "{0} in affected programs: {2}\n" "helped: {3}\n" "HURT: {4}".format( m, change(total_before[m], total_after[m]), change(affected_before[m], affected_after[m]), num_helped[m], num_hurt[m])) # Statistics for spills and fills is usually meaningless. if m in ["spills", "fills"]: print() continue if num_helped[m] > 2 or (num_helped[m] > 0 and num_hurt[m] > 0): (avg_abs, med_abs, lo_abs, hi_abs, avg_rel, med_rel, lo_rel, hi_rel) = helped_statistics[m] print("helped stats (abs) min: {} max: {} x\u0304: {:.2f} x\u0303: {}".format( lo_abs, hi_abs, avg_abs, int(med_abs))) print("helped stats (rel) min: {} max: {} x\u0304: {} x\u0303: {}".format( format_percent(lo_rel), format_percent(hi_rel), format_percent(avg_rel), format_percent(med_rel))) if num_hurt[m] > 2 or (num_hurt[m] > 0 and num_helped[m] > 0): (avg_abs, med_abs, lo_abs, hi_abs, avg_rel, med_rel, lo_rel, hi_rel) = hurt_statistics[m] print("HURT stats (abs) min: {} max: {} x\u0304: {:.2f} x\u0303: {}".format( lo_abs, hi_abs, avg_abs, int(med_abs))) print("HURT stats (rel) min: {} max: {} x\u0304: {} x\u0303: {}".format( format_percent(lo_rel), format_percent(hi_rel), format_percent(avg_rel), format_percent(med_rel))) if m in confidence_interval: print("95% mean confidence interval for {} value: {:.2f} {:.2f}".format(m, confidence_interval[m][0][1], confidence_interval[m][0][2])) print("95% mean confidence interval for {} %-change: {} {}".format(m, format_percent(confidence_interval[m][1][1]), format_percent(confidence_interval[m][1][2]))) # Be very, very conservative about applying results # based on the confidence intervals. Neither interval # can include zero, and both intervals must be on the # same side of zero. if confidence_interval[m][0][1] < 0 and confidence_interval[m][0][2] > 0: print("Inconclusive result (value mean confidence interval includes 0).") elif confidence_interval[m][1][1] < 0 and confidence_interval[m][1][2] > 0: print("Inconclusive result (%-change mean confidence interval includes 0).") elif (confidence_interval[m][0][1] < 0) != (confidence_interval[m][1][1] < 0): print("Inconclusive result (value mean confidence interval and %-change mean confidence interval disagree).") elif (confidence_interval[m][0][1] < 0) ^ higher_is_better(m): print("{} are helped.".format(m.capitalize())) else: print("{} are HURT.".format(m.capitalize())) print() if lost or gained or not args.changes_only: print("LOST: " + str(len(lost))) print("GAINED: " + str(len(gained))) else: if not any_helped_or_hurt: print("No changes.") print("") print("Total CPU time (seconds): " + change(time_before, time_after)) if __name__ == "__main__": main()