diff options
Diffstat (limited to 'drivers/accel/habanalabs/common/device.c')
-rw-r--r-- | drivers/accel/habanalabs/common/device.c | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 9711e8fc979d..a365791a9f5c 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -853,6 +853,9 @@ static int device_early_init(struct hl_device *hdev) gaudi2_set_asic_funcs(hdev); strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name)); break; + case ASIC_GAUDI2C: + gaudi2_set_asic_funcs(hdev); + strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name)); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", @@ -1041,18 +1044,21 @@ static bool is_pci_link_healthy(struct hl_device *hdev) return (vendor_id == PCI_VENDOR_ID_HABANALABS); } -static void hl_device_eq_heartbeat(struct hl_device *hdev) +static int hl_device_eq_heartbeat_check(struct hl_device *hdev) { - u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; struct asic_fixed_properties *prop = &hdev->asic_prop; if (!prop->cpucp_info.eq_health_check_supported) - return; + return 0; - if (hdev->eq_heartbeat_received) + if (hdev->eq_heartbeat_received) { hdev->eq_heartbeat_received = false; - else - hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask); + } else { + dev_err(hdev->dev, "EQ heartbeat event was not received!\n"); + return -EIO; + } + + return 0; } static void hl_device_heartbeat(struct work_struct *work) @@ -1069,10 +1075,9 @@ static void hl_device_heartbeat(struct work_struct *work) /* * For EQ health check need to check if driver received the heartbeat eq event * in order to validate the eq is working. + * Only if both the EQ is healthy and we managed to send the next heartbeat reschedule. */ - hl_device_eq_heartbeat(hdev); - - if (!hdev->asic_funcs->send_heartbeat(hdev)) + if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev))) goto reschedule; if (hl_device_operational(hdev, NULL)) @@ -2035,7 +2040,7 @@ device_reset: if (ctx) hl_ctx_put(ctx); - return hl_device_reset(hdev, flags); + return hl_device_reset(hdev, flags | HL_DRV_RESET_HARD); } static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask) |