From f05cae5e7397e92a4e36273925be58132e17047f Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 5 Jul 2024 14:43:06 -0700 Subject: tests/intel/xe_oa: Add "oa-formats" subtest Add "oa-formats" subtest. v2: Remove undefined_a_counters (Umesh) Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa --- tests/intel/xe_oa.c | 465 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 465 insertions(+) diff --git a/tests/intel/xe_oa.c b/tests/intel/xe_oa.c index 7f7a45194..05073db86 100644 --- a/tests/intel/xe_oa.c +++ b/tests/intel/xe_oa.c @@ -478,6 +478,57 @@ write_u64_file(const char *path, uint64_t val) fclose(f); } +static uint64_t +read_report_ticks(const uint32_t *report, enum intel_xe_oa_format_name format) +{ + + struct oa_format fmt = get_oa_format(format); + + return fmt.report_hdr_64bit ? *(uint64_t *)&report[6] : report[3]; +} + +/* + * t0 is a value sampled before t1. width is number of bits used to represent + * t0/t1. Normally t1 is greater than t0. In cases where t1 < t0 use this + * helper. Since the size of t1/t0 is already 64 bits, no special handling is + * needed for width = 64. + */ +static uint64_t +elapsed_delta(uint64_t t1, uint64_t t0, uint32_t width) +{ + uint32_t max_bits = sizeof(t1) * 8; + + igt_assert(width <= max_bits); + + if (t1 < t0 && width != max_bits) + return ((1ULL << width) - t0) + t1; + + return t1 - t0; +} + +static uint64_t +oa_tick_delta(const uint32_t *report1, + const uint32_t *report0, + enum intel_xe_oa_format_name format) +{ + return elapsed_delta(read_report_ticks(report1, format), + read_report_ticks(report0, format), 32); +} + +static void +read_report_clock_ratios(const uint32_t *report, + uint32_t *slice_freq_mhz, + uint32_t *unslice_freq_mhz) +{ + uint32_t unslice_freq = report[0] & 0x1ff; + uint32_t slice_freq_low = (report[0] >> 25) & 0x7f; + uint32_t slice_freq_high = (report[0] >> 9) & 0x3; + uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7); + + *slice_freq_mhz = (slice_freq * 16666) / 1000; + *unslice_freq_mhz = (unslice_freq * 16666) / 1000; +} + static uint32_t report_reason(const uint32_t *report) { @@ -485,6 +536,27 @@ report_reason(const uint32_t *report) OAREPORT_REASON_MASK); } +static const char * +read_report_reason(const uint32_t *report) +{ + uint32_t reason = report_reason(report); + + if (reason & (1<<0)) + return "timer"; + else if (reason & (1<<1)) + return "internal trigger 1"; + else if (reason & (1<<2)) + return "internal trigger 2"; + else if (reason & (1<<3)) + return "context switch"; + else if (reason & (1<<4)) + return "GO 1->0 transition (enter RC6)"; + else if (reason & (1<<5)) + return "[un]slice clock ratio change"; + else + return "unknown"; +} + static uint64_t oa_timestamp(const uint32_t *report, enum intel_xe_oa_format_name format) { @@ -493,6 +565,17 @@ oa_timestamp(const uint32_t *report, enum intel_xe_oa_format_name format) return fmt.report_hdr_64bit ? *(uint64_t *)&report[2] : report[1]; } +static uint64_t +oa_timestamp_delta(const uint32_t *report1, + const uint32_t *report0, + enum intel_xe_oa_format_name format) +{ + uint32_t width = intel_graphics_ver(devid) >= IP_VER(12, 55) ? 56 : 32; + + return elapsed_delta(oa_timestamp(report1, format), + oa_timestamp(report0, format), width); +} + static uint64_t timebase_scale(uint64_t delta) { @@ -529,6 +612,182 @@ oa_report_is_periodic(uint32_t oa_exponent, const uint32_t *report) return false; } +static uint64_t +read_40bit_a_counter(const uint32_t *report, + enum intel_xe_oa_format_name fmt, int a_id) +{ + struct oa_format format = get_oa_format(fmt); + uint8_t *a40_high = (((uint8_t *)report) + format.a40_high_off); + uint32_t *a40_low = (uint32_t *)(((uint8_t *)report) + + format.a40_low_off); + uint64_t high = (uint64_t)(a40_high[a_id]) << 32; + + return a40_low[a_id] | high; +} + +static uint64_t +xehpsdv_read_64bit_a_counter(const uint32_t *report, enum intel_xe_oa_format_name fmt, int a_id) +{ + struct oa_format format = get_oa_format(fmt); + uint64_t *a64 = (uint64_t *)(((uint8_t *)report) + format.a64_off); + + return a64[a_id]; +} + +static uint64_t +get_40bit_a_delta(uint64_t value0, uint64_t value1) +{ + if (value0 > value1) + return (1ULL << 40) + value1 - value0; + else + return value1 - value0; +} + +static void +accumulate_uint64(int a_index, + const uint32_t *report0, + const uint32_t *report1, + enum intel_xe_oa_format_name format, + uint64_t *delta) +{ + uint64_t value0 = xehpsdv_read_64bit_a_counter(report0, format, a_index), + value1 = xehpsdv_read_64bit_a_counter(report1, format, a_index); + + *delta += (value1 - value0); +} + +/* The TestOa metric set is designed so */ +static void +sanity_check_reports(const uint32_t *oa_report0, const uint32_t *oa_report1, + enum intel_xe_oa_format_name fmt) +{ + struct oa_format format = get_oa_format(fmt); + uint64_t time_delta = timebase_scale(oa_timestamp_delta(oa_report1, + oa_report0, + fmt)); + uint64_t clock_delta = oa_tick_delta(oa_report1, oa_report0, fmt); + uint64_t max_delta; + uint64_t freq; + uint32_t *rpt0_b = (uint32_t *)(((uint8_t *)oa_report0) + + format.b_off); + uint32_t *rpt1_b = (uint32_t *)(((uint8_t *)oa_report1) + + format.b_off); + uint32_t b; + uint32_t ref; + + igt_debug("report type: %s->%s\n", + read_report_reason(oa_report0), + read_report_reason(oa_report1)); + + freq = time_delta ? (clock_delta * 1000) / time_delta : 0; + igt_debug("freq = %"PRIu64"\n", freq); + + igt_debug("clock delta = %"PRIu64"\n", clock_delta); + + max_delta = clock_delta * intel_xe_perf->devinfo.n_eus; + + /* Gen8+ has some 40bit A counters... */ + for (int j = format.first_a40; j < format.n_a40 + format.first_a40; j++) { + uint64_t value0 = read_40bit_a_counter(oa_report0, fmt, j); + uint64_t value1 = read_40bit_a_counter(oa_report1, fmt, j); + uint64_t delta = get_40bit_a_delta(value0, value1); + + igt_debug("A40_%d: delta = %"PRIu64"\n", j, delta); + igt_assert_f(delta <= max_delta, + "A40_%d: delta = %"PRIu64", max_delta = %"PRIu64"\n", + j, delta, max_delta); + } + + for (int j = 0; j < format.n_a64; j++) { + uint64_t delta = 0; + + accumulate_uint64(j, oa_report0, oa_report1, fmt, &delta); + + igt_debug("A64_%d: delta = %"PRIu64"\n", format.first_a + j, delta); + igt_assert_f(delta <= max_delta, + "A64_%d: delta = %"PRIu64", max_delta = %"PRIu64"\n", + format.first_a + j, delta, max_delta); + } + + for (int j = 0; j < format.n_a; j++) { + uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) + + format.a_off); + uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) + + format.a_off); + int a_id = format.first_a + j; + uint32_t delta = a1[j] - a0[j]; + + igt_debug("A%d: delta = %"PRIu32"\n", a_id, delta); + igt_assert_f(delta <= max_delta, + "A%d: delta = %"PRIu32", max_delta = %"PRIu64"\n", + a_id, delta, max_delta); + } + + /* The TestOa metric set defines all B counters to be a + * multiple of the gpu clock + */ + if (format.n_b && (format.oa_type == DRM_XE_OA_FMT_TYPE_OAG || format.oa_type == DRM_XE_OA_FMT_TYPE_OAR)) { + if (clock_delta > 0) { + b = rpt1_b[0] - rpt0_b[0]; + igt_debug("B0: delta = %"PRIu32"\n", b); + igt_assert_eq(b, 0); + + b = rpt1_b[1] - rpt0_b[1]; + igt_debug("B1: delta = %"PRIu32"\n", b); + igt_assert_eq(b, clock_delta); + + b = rpt1_b[2] - rpt0_b[2]; + igt_debug("B2: delta = %"PRIu32"\n", b); + igt_assert_eq(b, clock_delta); + + b = rpt1_b[3] - rpt0_b[3]; + ref = clock_delta / 2; + igt_debug("B3: delta = %"PRIu32"\n", b); + igt_assert(b >= ref - 1 && b <= ref + 1); + + b = rpt1_b[4] - rpt0_b[4]; + ref = clock_delta / 3; + igt_debug("B4: delta = %"PRIu32"\n", b); + igt_assert(b >= ref - 1 && b <= ref + 1); + + b = rpt1_b[5] - rpt0_b[5]; + ref = clock_delta / 3; + igt_debug("B5: delta = %"PRIu32"\n", b); + igt_assert(b >= ref - 1 && b <= ref + 1); + + b = rpt1_b[6] - rpt0_b[6]; + ref = clock_delta / 6; + igt_debug("B6: delta = %"PRIu32"\n", b); + igt_assert(b >= ref - 1 && b <= ref + 1); + + b = rpt1_b[7] - rpt0_b[7]; + ref = clock_delta * 2 / 3; + igt_debug("B7: delta = %"PRIu32"\n", b); + igt_assert(b >= ref - 1 && b <= ref + 1); + } else { + for (int j = 0; j < format.n_b; j++) { + b = rpt1_b[j] - rpt0_b[j]; + igt_debug("B%i: delta = %"PRIu32"\n", j, b); + igt_assert_eq(b, 0); + } + } + } + + for (int j = 0; j < format.n_c; j++) { + uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) + + format.c_off); + uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) + + format.c_off); + uint32_t delta = c1[j] - c0[j]; + + igt_debug("C%d: delta = %"PRIu32", max_delta=%"PRIu64"\n", + j, delta, max_delta); + igt_assert_f(delta <= max_delta, + "C%d: delta = %"PRIu32", max_delta = %"PRIu64"\n", + j, delta, max_delta); + } +} + static bool init_sys_info(void) { @@ -817,6 +1076,191 @@ read_2_oa_reports(int format_id, igt_assert(!"reached"); } +static void +open_and_read_2_oa_reports(int format_id, + int exponent, + uint32_t *oa_report0, + uint32_t *oa_report1, + bool timer_only, + const struct drm_xe_engine_class_instance *hwe) +{ + struct intel_xe_perf_metric_set *test_set = metric_set(hwe); + uint64_t properties[] = { + DRM_XE_OA_PROPERTY_OA_UNIT_ID, 0, + + /* Include OA reports in samples */ + DRM_XE_OA_PROPERTY_SAMPLE_OA, true, + + /* OA unit configuration */ + DRM_XE_OA_PROPERTY_OA_METRIC_SET, test_set->perf_oa_metrics_set, + DRM_XE_OA_PROPERTY_OA_FORMAT, __ff(format_id), + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, exponent, + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, hwe->engine_instance, + + }; + struct intel_xe_oa_open_prop param = { + .num_properties = ARRAY_SIZE(properties) / 2, + .properties_ptr = to_user_pointer(properties), + }; + + stream_fd = __perf_open(drm_fd, ¶m, false); + set_fd_flags(stream_fd, O_CLOEXEC); + + read_2_oa_reports(format_id, exponent, + oa_report0, oa_report1, timer_only); + + __perf_close(stream_fd); +} + +static void +print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt) +{ + struct oa_format format = get_oa_format(fmt); + uint64_t ts0 = oa_timestamp(oa_report0, fmt); + uint64_t ts1 = oa_timestamp(oa_report1, fmt); + + igt_debug("TIMESTAMP: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n", + ts0, ts1, ts1 - ts0); + + { + uint64_t clock0 = read_report_ticks(oa_report0, fmt); + uint64_t clock1 = read_report_ticks(oa_report1, fmt); + + igt_debug("CLOCK: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n", + clock0, clock1, clock1 - clock0); + } + + { + uint32_t slice_freq0, slice_freq1, unslice_freq0, unslice_freq1; + const char *reason0 = read_report_reason(oa_report0); + const char *reason1 = read_report_reason(oa_report1); + + igt_debug("CTX ID: 1st = %"PRIu32", 2nd = %"PRIu32"\n", + oa_report0[2], oa_report1[2]); + + read_report_clock_ratios(oa_report0, + &slice_freq0, &unslice_freq0); + read_report_clock_ratios(oa_report1, + &slice_freq1, &unslice_freq1); + + igt_debug("SLICE CLK: 1st = %umhz, 2nd = %umhz, delta = %d\n", + slice_freq0, slice_freq1, + ((int)slice_freq1 - (int)slice_freq0)); + igt_debug("UNSLICE CLK: 1st = %umhz, 2nd = %umhz, delta = %d\n", + unslice_freq0, unslice_freq1, + ((int)unslice_freq1 - (int)unslice_freq0)); + + igt_debug("REASONS: 1st = \"%s\", 2nd = \"%s\"\n", reason0, reason1); + } + + /* Gen8+ has some 40bit A counters... */ + for (int j = 0; j < format.n_a40; j++) { + uint64_t value0 = read_40bit_a_counter(oa_report0, fmt, j); + uint64_t value1 = read_40bit_a_counter(oa_report1, fmt, j); + uint64_t delta = get_40bit_a_delta(value0, value1); + + igt_debug("A%d: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n", + j, value0, value1, delta); + } + + for (int j = 0; j < format.n_a64; j++) { + uint64_t value0 = xehpsdv_read_64bit_a_counter(oa_report0, fmt, j); + uint64_t value1 = xehpsdv_read_64bit_a_counter(oa_report1, fmt, j); + uint64_t delta = value1 - value0; + + igt_debug("A_64%d: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n", + format.first_a + j, value0, value1, delta); + } + + for (int j = 0; j < format.n_a; j++) { + uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) + + format.a_off); + uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) + + format.a_off); + int a_id = format.first_a + j; + uint32_t delta = a1[j] - a0[j]; + + igt_debug("A%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n", + a_id, a0[j], a1[j], delta); + } + + for (int j = 0; j < format.n_b; j++) { + uint32_t *b0 = (uint32_t *)(((uint8_t *)oa_report0) + + format.b_off); + uint32_t *b1 = (uint32_t *)(((uint8_t *)oa_report1) + + format.b_off); + uint32_t delta = b1[j] - b0[j]; + + igt_debug("B%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n", + j, b0[j], b1[j], delta); + } + + for (int j = 0; j < format.n_c; j++) { + uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) + + format.c_off); + uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) + + format.c_off); + uint32_t delta = c1[j] - c0[j]; + + igt_debug("C%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n", + j, c0[j], c1[j], delta); + } +} + +static bool +hwe_supports_oa_type(int oa_type, const struct drm_xe_engine_class_instance *hwe) +{ + switch (oa_type) { + case DRM_XE_OA_FMT_TYPE_OAM: + case DRM_XE_OA_FMT_TYPE_OAM_MPEC: + return hwe->engine_class == DRM_XE_ENGINE_CLASS_VIDEO_DECODE || + hwe->engine_class == DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE; + case DRM_XE_OA_FMT_TYPE_OAG: + case DRM_XE_OA_FMT_TYPE_OAR: + return hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER; + case DRM_XE_OA_FMT_TYPE_OAC: + return hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE; + case DRM_XE_OA_FMT_TYPE_PEC: + return hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER || + hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE; + default: + return false; + } + +} + +/** + * SUBTEST: oa-formats + * Description: Test that supported OA formats work as expected + */ +static void test_oa_formats(const struct drm_xe_engine_class_instance *hwe) +{ + for (int i = 0; i < XE_OA_FORMAT_MAX; i++) { + struct oa_format format = get_oa_format(i); + uint32_t oa_report0[format.size / 4]; + uint32_t oa_report1[format.size / 4]; + + if (!format.name) /* sparse, indexed by ID */ + continue; + + if (!hwe_supports_oa_type(format.oa_type, hwe)) + continue; + + igt_debug("Checking OA format %s\n", format.name); + + open_and_read_2_oa_reports(i, + oa_exp_1_millisec, + oa_report0, + oa_report1, + false, /* timer reports only */ + hwe); + + print_reports(oa_report0, oa_report1, i); + sanity_check_reports(oa_report0, oa_report1, i); + } +} + + static unsigned read_xe_module_ref(void) { FILE *fp = fopen("/proc/modules", "r"); @@ -933,8 +1377,25 @@ test_sysctl_defaults(void) igt_assert_eq(paranoid, 1); } +#define __for_one_render_engine_0(hwe) \ + xe_for_each_engine(drm_fd, hwe) \ + if (hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER) \ + break; \ + for_each_if(hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER) \ + igt_dynamic_f("rcs-%d", hwe->engine_instance) + +#define __for_one_render_engine(hwe) \ + for (int m = 0, done = 0; !done; m++) \ + for_each_if(m < xe_number_engines(drm_fd) && \ + (hwe = &xe_engine(drm_fd, m)->instance) && \ + hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER && \ + (done = 1)) \ + igt_dynamic_f("rcs-%d", hwe->engine_instance) + igt_main { + struct drm_xe_engine_class_instance *hwe = NULL; + igt_fixture { struct stat sb; @@ -988,6 +1449,10 @@ igt_main igt_subtest("missing-sample-flags") test_missing_sample_flags(); + igt_subtest_with_dynamic("oa-formats") + __for_one_render_engine(hwe) + test_oa_formats(hwe); + igt_fixture { /* leave sysctl options in their default state... */ write_u64_file("/proc/sys/dev/xe/observation_paranoid", 1); -- cgit v1.2.3