summaryrefslogtreecommitdiff
path: root/drivers/accel/habanalabs/gaudi2
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/habanalabs/gaudi2')
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2.c74
1 files changed, 36 insertions, 38 deletions
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 819660c684cf..e0e5615ef9b0 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7858,39 +7858,44 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
return !!ecc_data->is_critical;
}
-static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
+static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
{
- u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
- u64 cq_ptr, arc_cq_ptr, cp_current_inst;
+ struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
+ u64 cq_ptr, cp_current_inst;
+ u32 lo, hi, cq_size, cp_sts;
+ bool is_arc_cq;
- lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET);
- hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET);
- cq_ptr = ((u64) hi) << 32 | lo;
- cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET);
+ cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
+ is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
- lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET);
- hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET);
- arc_cq_ptr = ((u64) hi) << 32 | lo;
- arc_cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET);
+ if (is_arc_cq) {
+ lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
+ hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
+ cq_ptr = ((u64) hi) << 32 | lo;
+ cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
+ } else {
+ lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
+ hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
+ cq_ptr = ((u64) hi) << 32 | lo;
+ cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
+ }
lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
cp_current_inst = ((u64) hi) << 32 | lo;
dev_info(hdev->dev,
- "LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
- cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
+ "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
+ is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
- if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
- if (arc_cq_ptr) {
- hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
- hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
- } else {
- hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
- hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
- }
-
- hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
+ if (undef_opcode->write_enable) {
+ memset(undef_opcode, 0, sizeof(*undef_opcode));
+ undef_opcode->timestamp = ktime_get();
+ undef_opcode->cq_addr = cq_ptr;
+ undef_opcode->cq_size = cq_size;
+ undef_opcode->engine_id = engine_id;
+ undef_opcode->stream_id = QMAN_STREAMS;
+ undef_opcode->write_enable = 0;
}
}
@@ -7929,21 +7934,12 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
error_count++;
}
- if (i == QMAN_STREAMS && error_count) {
- /* check for undefined opcode */
- if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK &&
- hdev->captured_err_info.undef_opcode.write_enable) {
- memset(&hdev->captured_err_info.undef_opcode, 0,
- sizeof(hdev->captured_err_info.undef_opcode));
-
- hdev->captured_err_info.undef_opcode.write_enable = false;
- hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
- hdev->captured_err_info.undef_opcode.engine_id =
- gaudi2_queue_id_to_engine_id[qid_base];
- *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
- }
-
- handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
+ /* Check for undefined opcode error in lower QM */
+ if ((i == QMAN_STREAMS) &&
+ (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
+ handle_lower_qman_data_on_err(hdev, qman_base,
+ gaudi2_queue_id_to_engine_id[qid_base]);
+ *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
}
}
@@ -10007,6 +10003,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+ if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13))
+ is_critical = true;
break;
case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: