diff options
Diffstat (limited to 'drivers/accel/habanalabs/gaudi2/gaudi2.c')
-rw-r--r-- | drivers/accel/habanalabs/gaudi2/gaudi2.c | 74 |
1 files changed, 36 insertions, 38 deletions
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 819660c684cf..e0e5615ef9b0 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -7858,39 +7858,44 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, return !!ecc_data->is_critical; } -static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask) +static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id) { - u32 lo, hi, cq_ptr_size, arc_cq_ptr_size; - u64 cq_ptr, arc_cq_ptr, cp_current_inst; + struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; + u64 cq_ptr, cp_current_inst; + u32 lo, hi, cq_size, cp_sts; + bool is_arc_cq; - lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET); - hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET); - cq_ptr = ((u64) hi) << 32 | lo; - cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET); + cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET); + is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */ - lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET); - hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET); - arc_cq_ptr = ((u64) hi) << 32 | lo; - arc_cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET); + if (is_arc_cq) { + lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET); + hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET); + cq_ptr = ((u64) hi) << 32 | lo; + cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET); + } else { + lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET); + hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET); + cq_ptr = ((u64) hi) << 32 | lo; + cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET); + } lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET); hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET); cp_current_inst = ((u64) hi) << 32 | lo; dev_info(hdev->dev, - "LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n", - cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst); + "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n", + is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst); - if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { - if (arc_cq_ptr) { - hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr; - hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size; - } else { - hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; - hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size; - } - - hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS; + if (undef_opcode->write_enable) { + memset(undef_opcode, 0, sizeof(*undef_opcode)); + undef_opcode->timestamp = ktime_get(); + undef_opcode->cq_addr = cq_ptr; + undef_opcode->cq_size = cq_size; + undef_opcode->engine_id = engine_id; + undef_opcode->stream_id = QMAN_STREAMS; + undef_opcode->write_enable = 0; } } @@ -7929,21 +7934,12 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type error_count++; } - if (i == QMAN_STREAMS && error_count) { - /* check for undefined opcode */ - if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK && - hdev->captured_err_info.undef_opcode.write_enable) { - memset(&hdev->captured_err_info.undef_opcode, 0, - sizeof(hdev->captured_err_info.undef_opcode)); - - hdev->captured_err_info.undef_opcode.write_enable = false; - hdev->captured_err_info.undef_opcode.timestamp = ktime_get(); - hdev->captured_err_info.undef_opcode.engine_id = - gaudi2_queue_id_to_engine_id[qid_base]; - *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; - } - - handle_lower_qman_data_on_err(hdev, qman_base, *event_mask); + /* Check for undefined opcode error in lower QM */ + if ((i == QMAN_STREAMS) && + (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) { + handle_lower_qman_data_on_err(hdev, qman_base, + gaudi2_queue_id_to_engine_id[qid_base]); + *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; } } @@ -10007,6 +10003,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13)) + is_critical = true; break; case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: |