diff options
author | Tomer Tayar <ttayar@habana.ai> | 2024-04-16 17:01:12 +0300 |
---|---|---|
committer | Ofir Bitton <obitton@habana.ai> | 2024-06-23 09:53:32 +0300 |
commit | 795f93e650fc41c3f627d2733458c2f911bc9568 (patch) | |
tree | 999a43a13e38b6e60424139f3b355632195c4884 /drivers/accel | |
parent | 9ee446f9b5d0172a94681aae01fabde4891f7123 (diff) |
accel/habanalabs: revise print on EQ heartbeat failure
Don't print the "previous EQ index" value in case of a EQ heartbeat
failure, because it is incremented along with the EQ CI and therefore
redundant.
In addition, as the CPU-CP PI is zeroed when it reaches a value that is
twice the queue size, add a value of the CI with a similar wrap around,
to make it easier to compare the values.
Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Diffstat (limited to 'drivers/accel')
-rw-r--r-- | drivers/accel/habanalabs/common/device.c | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 2fa6bf4c97af..3efc26dd9497 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -1064,23 +1064,24 @@ static bool is_pci_link_healthy(struct hl_device *hdev) static bool hl_device_eq_heartbeat_received(struct hl_device *hdev) { + struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info; + u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1; struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 cpu_q_id; if (!prop->cpucp_info.eq_health_check_supported) return true; if (!hdev->eq_heartbeat_received) { - cpu_q_id = hdev->heartbeat_debug_info.cpu_queue_id; - dev_err(hdev->dev, "EQ heartbeat event was not received!\n"); - dev_err(hdev->dev, "Heartbeat events counter: %u, Q_PI: %u, Q_CI: %u, EQ CI: %u, EQ prev: %u\n", - hdev->heartbeat_debug_info.heartbeat_event_counter, - hdev->kernel_queues[cpu_q_id].pi, - atomic_read(&hdev->kernel_queues[cpu_q_id].ci), - hdev->event_queue.ci, - hdev->event_queue.prev_eqe_index); + dev_err(hdev->dev, + "Heartbeat events counter: %u, EQ CI: %u, PQ PI: %u, PQ CI: %u (%u)\n", + heartbeat_debug_info->heartbeat_event_counter, + hdev->event_queue.ci, + hdev->kernel_queues[cpu_q_id].pi, + atomic_read(&hdev->kernel_queues[cpu_q_id].ci), + atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask); + return false; } |