summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJosé Roberto de Souza <jose.souza@intel.com>2024-06-28 15:23:56 -0700
committerAshutosh Dixit <ashutosh.dixit@intel.com>2024-06-29 12:39:46 -0700
commit0c1553fbd50c7b904cc23a3a2327f43171c37808 (patch)
tree2fe17f569a868a50812844914ba0cf82a95c49d9 /lib
parent2879175756161d202674df2f4586d62587e3da68 (diff)
xe/oa/mdapi-xml-convert: Add support for 576B_PEC64LL format
Xe2 don't use the 256bytes long format, instead it uses a 576bytes long with 64 PEC fields that are 64 bits long. This patch fixes the xe2 definition and add the parser for this format. Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/xe/oa-configs/codegen.py5
-rwxr-xr-xlib/xe/oa-configs/mdapi-xml-convert.py76
-rw-r--r--lib/xe/xe_oa.h1
3 files changed, 67 insertions, 15 deletions
diff --git a/lib/xe/oa-configs/codegen.py b/lib/xe/oa-configs/codegen.py
index f078ef949..51498f803 100644
--- a/lib/xe/oa-configs/codegen.py
+++ b/lib/xe/oa-configs/codegen.py
@@ -128,6 +128,7 @@ class Set:
return self.xml.find(path)
+#TODO add SqidiTotalCount, L3BankTotalCount, L3NodeTotalCount
hw_vars_mapping = {
"$EuCoresTotalCount": { 'c': "perf->devinfo.n_eus", 'desc': "The total number of execution units" },
"$EuSlicesTotalCount": { 'c': "perf->devinfo.n_eu_slices" },
@@ -159,6 +160,10 @@ hw_vars_mapping = {
"$GpuMaxFrequency": { 'c': "perf->devinfo.gt_max_freq" },
"$SkuRevisionId": { 'c': "perf->devinfo.revision" },
"$QueryMode": { 'c': "perf->devinfo.query_mode" },
+
+ "$ComputeEngineTotalCount": { 'c': "perf->devinfo.n_eus", 'desc': "The total number of execution units" },
+
+ "$CopyEngineTotalCount": { 'c': "perf->devinfo.n_eus", 'desc': "The total number of execution units" },
}
def is_hw_var(name):
diff --git a/lib/xe/oa-configs/mdapi-xml-convert.py b/lib/xe/oa-configs/mdapi-xml-convert.py
index 575b53e95..602a3965a 100755
--- a/lib/xe/oa-configs/mdapi-xml-convert.py
+++ b/lib/xe/oa-configs/mdapi-xml-convert.py
@@ -149,12 +149,10 @@ mtl_chipset_oam_samedia_params = {
}
}
-# FIXME: everything except oa_report_size is incorrect here
-xe2_chipset_params = {
- 'a_offset': 16,
- 'b_offset': 192,
- 'c_offset': 224,
+xe2_chipset_params_pec = {
+ 'pec_offset': 32,
'oa_report_size': 576,
+ # TODO: Not sure about the configs below
'config_reg_blacklist': {
0x2364, # OACTXID
},
@@ -183,7 +181,11 @@ mtl_chipset_oa_formats = {
}
xe2_chipset_oa_formats = {
- '256B_GENERIC_NOA16': xe2_chipset_params,
+ '576B_PEC64LL': xe2_chipset_params_pec,
+ # We only care about 576B_PEC64LL metrics in Xe2, entries below is just to
+ # surpress warnings
+ '256B_GENERIC_NOA16': xehpsdv_chipset_params,
+ '128B_MPEC8_NOA16': mtl_chipset_oam_samedia_params,
}
chipsets = {
@@ -209,8 +211,6 @@ chipsets = {
xehp_plus = ( 'ACM', 'MTL' )
-xe2_plus = ( 'LNL' )
-
register_types = { 'OA', 'NOA', 'FLEX', 'PM' }
default_set_blacklist = { "RenderDX1x", # TODO: rename to something non 'DX'
@@ -298,6 +298,46 @@ def read_token_to_rpn_read_oam(chipset, token, raw_offsets, oa_format):
assert 0
+def read_token_to_rpn_read_pec(chipset, token, raw_offsets, oa_format):
+ width, offset_str = token.split('@')
+ offset = int(offset_str, 16)
+
+ if width != 'qw':
+ assert 0
+
+ if raw_offsets:
+ pec_offset = chipsets[chipset][oa_format]['pec_offset']
+
+ if offset < pec_offset:
+ if offset == 8:
+ return "GPU_TIME 0 READ"
+ elif offset == 24:
+ return "GPU_CLOCK 0 READ"
+ else:
+ assert 0
+ else:
+ pec_cnt_offset = int((offset - pec_offset) / 8)
+
+ return "PEC " + str(pec_cnt_offset) + " READ"
+ else:
+ # Location in the accumulated deltas
+ idx = int(offset / 8)
+
+ if idx == 0:
+ return "GPU_TIME 0 READ"
+ elif idx == 1:
+ return "GPU_CLOCK 0 READ"
+ else:
+ idx = idx - 2;
+ pec_cnt_offset = str(idx)
+ pec_high_low_text = "low"
+ if (offset % 8) > 0:
+ pec_high_low_text = "high"
+
+ return "PEC " + pec_cnt_offset + " READ"
+
+ assert 0
+
def read_token_to_rpn_read_oag(chipset, token, raw_offsets, oa_format):
width, offset_str = token.split('@')
@@ -374,9 +414,6 @@ def read_token_to_rpn_read_oag(chipset, token, raw_offsets, oa_format):
return "C " + str(idx - 48) + " READ"
else:
return "{0} READ".format(read_value(chipset, offset, oa_format))
- elif chipset in xe2_plus:
- # FIXME: skip all metrics to retain just the registers
- return "GPU_TIME 0 READ"
else:
# For Gen8+ the array of accumulated counters is
# assumed to start with a GPU_TIME then GPU_CLOCK,
@@ -405,6 +442,9 @@ def read_token_to_rpn_read(chipset, token, raw_offsets, oa_format):
if oa_format in ['192B_MPEC8LL_NOA16', '128B_MPEC8_NOA16']:
return read_token_to_rpn_read_oam(chipset, token, raw_offsets, oa_format)
+ if oa_format in ['576B_PEC64LL']:
+ return read_token_to_rpn_read_pec(chipset, token, raw_offsets, oa_format)
+
assert 0
def replace_read_tokens_with_rpn_read_ops(chipset, oa_format, equation, raw_offsets):
@@ -905,6 +945,8 @@ for arg in args.xml:
mdapi_counter.set('NormalizationEquation', '$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV')
#mdapi_counter.set('DeltaReportReadEquation', '$GpuCoreClocks $GpuTime UDIV')
+ if mdapi_counter.get('SymbolName') == "ResultUncertainty":
+ continue
symbol_name = oa_registry.Registry.sanitize_symbol_name(mdapi_counter.get('SymbolName'))
@@ -992,11 +1034,15 @@ for arg in args.xml:
# XXX: As a special case, we override the raw and delta report
# equations for the GpuTime counters, which seem inconsistent
if mdapi_counter.get('SymbolName') == "GpuTime":
- mdapi_counter.set('DeltaReportReadEquation', "qw@0x0 1000000000 UMUL $GpuTimestampFrequency UDIV")
- if chipset == 'MTL' and oa_format != '256B_GENERIC_NOA16':
- mdapi_counter.set('SnapshotReportReadEquation', "qw@0x08 1000000000 UMUL $GpuTimestampFrequency UDIV")
+ if oa_format == '576B_PEC64LL':
+ mdapi_counter.set('DeltaReportReadEquation', "qw@0x0 1000000000 UMUL $GpuTimestampFrequency UDIV")
+ mdapi_counter.set('SnapshotReportReadEquation', "qw@0x08 1000000000 UMUL $GpuTimestampFrequency UDIV")
else:
- mdapi_counter.set('SnapshotReportReadEquation', "dw@0x04 1000000000 UMUL $GpuTimestampFrequency UDIV")
+ mdapi_counter.set('DeltaReportReadEquation', "qw@0x0 1000000000 UMUL $GpuTimestampFrequency UDIV")
+ if chipset == 'MTL' and oa_format != '256B_GENERIC_NOA16':
+ mdapi_counter.set('SnapshotReportReadEquation', "qw@0x08 1000000000 UMUL $GpuTimestampFrequency UDIV")
+ else:
+ mdapi_counter.set('SnapshotReportReadEquation', "dw@0x04 1000000000 UMUL $GpuTimestampFrequency UDIV")
availability = fixup_equation(mdapi_counter.get('AvailabilityEquation'))
if availability == "":
diff --git a/lib/xe/xe_oa.h b/lib/xe/xe_oa.h
index c16177ec8..f6f2768b0 100644
--- a/lib/xe/xe_oa.h
+++ b/lib/xe/xe_oa.h
@@ -275,6 +275,7 @@ struct intel_xe_perf_metric_set {
int b_offset;
int c_offset;
int perfcnt_offset;
+ int pec_offset;
const struct intel_xe_perf_register_prog *b_counter_regs;
uint32_t n_b_counter_regs;