summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/goya/goya.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r--drivers/misc/habanalabs/goya/goya.c196
1 files changed, 129 insertions, 67 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 88460b2138d8..85030759b2af 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -6,10 +6,10 @@
*/
#include "goyaP.h"
-#include "include/hw_ip/mmu/mmu_general.h"
-#include "include/hw_ip/mmu/mmu_v1_0.h"
-#include "include/goya/asic_reg/goya_masks.h"
-#include "include/goya/goya_reg_map.h"
+#include "../include/hw_ip/mmu/mmu_general.h"
+#include "../include/hw_ip/mmu/mmu_v1_0.h"
+#include "../include/goya/asic_reg/goya_masks.h"
+#include "../include/goya/goya_reg_map.h"
#include <linux/pci.h>
#include <linux/genalloc.h>
@@ -338,11 +338,19 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
-void goya_get_fixed_properties(struct hl_device *hdev)
+int goya_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
int i;
+ prop->max_queues = GOYA_QUEUE_ID_SIZE;
+ prop->hw_queues_props = kcalloc(prop->max_queues,
+ sizeof(struct hw_queue_properties),
+ GFP_KERNEL);
+
+ if (!prop->hw_queues_props)
+ return -ENOMEM;
+
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
prop->hw_queues_props[i].driver_only = 0;
@@ -362,9 +370,6 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->hw_queues_props[i].requires_kernel_cb = 0;
}
- for (; i < HL_MAX_QUEUES; i++)
- prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
-
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->dram_base_address = DRAM_PHYS_BASE;
@@ -427,6 +432,10 @@ void goya_get_fixed_properties(struct hl_device *hdev)
strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
+
+ prop->max_pending_cs = GOYA_MAX_PENDING_CS;
+
+ return 0;
}
/*
@@ -457,6 +466,7 @@ static int goya_pci_bars_map(struct hl_device *hdev)
static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
{
struct goya_device *goya = hdev->asic_specific;
+ struct hl_inbound_pci_region pci_region;
u64 old_addr = addr;
int rc;
@@ -464,7 +474,10 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
return old_addr;
/* Inbound Region 1 - Bar 4 - Point to DDR */
- rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
+ pci_region.mode = PCI_BAR_MATCH_MODE;
+ pci_region.bar = DDR_BAR_ID;
+ pci_region.addr = addr;
+ rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
if (rc)
return U64_MAX;
@@ -486,8 +499,35 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
*/
static int goya_init_iatu(struct hl_device *hdev)
{
- return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
- HOST_PHYS_BASE, HOST_PHYS_SIZE);
+ struct hl_inbound_pci_region inbound_region;
+ struct hl_outbound_pci_region outbound_region;
+ int rc;
+
+ /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
+ inbound_region.mode = PCI_BAR_MATCH_MODE;
+ inbound_region.bar = SRAM_CFG_BAR_ID;
+ inbound_region.addr = SRAM_BASE_ADDR;
+ rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
+ if (rc)
+ goto done;
+
+ /* Inbound Region 1 - Bar 4 - Point to DDR */
+ inbound_region.mode = PCI_BAR_MATCH_MODE;
+ inbound_region.bar = DDR_BAR_ID;
+ inbound_region.addr = DRAM_PHYS_BASE;
+ rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
+ if (rc)
+ goto done;
+
+ hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+
+ /* Outbound Region 0 - Point to Host */
+ outbound_region.addr = HOST_PHYS_BASE;
+ outbound_region.size = HOST_PHYS_SIZE;
+ rc = hl_pci_set_outbound_region(hdev, &outbound_region);
+
+done:
+ return rc;
}
/*
@@ -508,7 +548,11 @@ static int goya_early_init(struct hl_device *hdev)
u32 val;
int rc;
- goya_get_fixed_properties(hdev);
+ rc = goya_get_fixed_properties(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to get fixed properties\n");
+ return rc;
+ }
/* Check BAR sizes */
if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
@@ -518,7 +562,8 @@ static int goya_early_init(struct hl_device *hdev)
(unsigned long long) pci_resource_len(pdev,
SRAM_CFG_BAR_ID),
CFG_BAR_SIZE);
- return -ENODEV;
+ rc = -ENODEV;
+ goto free_queue_props;
}
if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
@@ -528,14 +573,15 @@ static int goya_early_init(struct hl_device *hdev)
(unsigned long long) pci_resource_len(pdev,
MSIX_BAR_ID),
MSIX_BAR_SIZE);
- return -ENODEV;
+ rc = -ENODEV;
+ goto free_queue_props;
}
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
rc = hl_pci_init(hdev);
if (rc)
- return rc;
+ goto free_queue_props;
if (!hdev->pldm) {
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
@@ -545,6 +591,10 @@ static int goya_early_init(struct hl_device *hdev)
}
return 0;
+
+free_queue_props:
+ kfree(hdev->asic_prop.hw_queues_props);
+ return rc;
}
/*
@@ -557,6 +607,7 @@ static int goya_early_init(struct hl_device *hdev)
*/
static int goya_early_fini(struct hl_device *hdev)
{
+ kfree(hdev->asic_prop.hw_queues_props);
hl_pci_fini(hdev);
return 0;
@@ -593,11 +644,36 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
static void goya_fetch_psoc_frequency(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 trace_freq = 0;
+ u32 pll_clk = 0;
+ u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+ u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
+ u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
+ u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
+ u32 od = RREG32(mmPSOC_PCI_PLL_OD);
+
+ if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
+ if (div_sel == DIV_SEL_REF_CLK)
+ trace_freq = PLL_REF_CLK;
+ else
+ trace_freq = PLL_REF_CLK / (div_fctr + 1);
+ } else if (div_sel == DIV_SEL_PLL_CLK ||
+ div_sel == DIV_SEL_DIVIDED_PLL) {
+ pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
+ if (div_sel == DIV_SEL_PLL_CLK)
+ trace_freq = pll_clk;
+ else
+ trace_freq = pll_clk / (div_fctr + 1);
+ } else {
+ dev_warn(hdev->dev,
+ "Received invalid div select value: %d", div_sel);
+ }
- prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
- prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
- prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
- prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+ prop->psoc_timestamp_frequency = trace_freq;
+ prop->psoc_pci_pll_nr = nr;
+ prop->psoc_pci_pll_nf = nf;
+ prop->psoc_pci_pll_od = od;
+ prop->psoc_pci_pll_div_factor = div_fctr;
}
int goya_late_init(struct hl_device *hdev)
@@ -2165,29 +2241,15 @@ static void goya_disable_timestamp(struct hl_device *hdev)
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
{
- u32 wait_timeout_ms, cpu_timeout_ms;
+ u32 wait_timeout_ms;
dev_info(hdev->dev,
"Halting compute engines and disabling interrupts\n");
- if (hdev->pldm) {
+ if (hdev->pldm)
wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
- cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
- } else {
+ else
wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
- cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
- }
-
- if (hard_reset) {
- /*
- * I don't know what is the state of the CPU so make sure it is
- * stopped in any means necessary
- */
- WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
- WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
- GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
- msleep(cpu_timeout_ms);
- }
goya_stop_external_queues(hdev);
goya_stop_internal_queues(hdev);
@@ -2492,14 +2554,26 @@ disable_queues:
static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
{
struct goya_device *goya = hdev->asic_specific;
- u32 reset_timeout_ms, status;
+ u32 reset_timeout_ms, cpu_timeout_ms, status;
- if (hdev->pldm)
+ if (hdev->pldm) {
reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
- else
+ cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
+ } else {
reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
+ cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
+ }
if (hard_reset) {
+ /* I don't know what is the state of the CPU so make sure it is
+ * stopped in any means necessary
+ */
+ WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
+ WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
+ GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
+
+ msleep(cpu_timeout_ms);
+
goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
goya_disable_clk_rlx(hdev);
goya_set_pll_refclk(hdev);
@@ -3701,9 +3775,8 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
parser->patched_cb_size = parser->user_cb_size +
sizeof(struct packet_msg_prot) * 2;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
- parser->patched_cb_size,
- &patched_cb_handle, HL_KERNEL_ASID_ID);
+ rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
+ &patched_cb_handle, HL_KERNEL_ASID_ID, false);
if (rc) {
dev_err(hdev->dev,
@@ -3775,9 +3848,8 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
if (rc)
goto free_userptr;
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
- parser->patched_cb_size,
- &patched_cb_handle, HL_KERNEL_ASID_ID);
+ rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size,
+ &patched_cb_handle, HL_KERNEL_ASID_ID, false);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate patched CB for DMA CS %d\n", rc);
@@ -3946,8 +4018,7 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
*val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
(addr - SRAM_BASE_ADDR));
- } else if ((addr >= DRAM_PHYS_BASE) &&
- (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
+ } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
@@ -4003,8 +4074,7 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
(addr - SRAM_BASE_ADDR));
- } else if ((addr >= DRAM_PHYS_BASE) &&
- (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
+ } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
@@ -4048,9 +4118,8 @@ static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
*val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
(addr - SRAM_BASE_ADDR));
- } else if ((addr >= DRAM_PHYS_BASE) &&
- (addr <=
- DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
+ } else if (addr <=
+ DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
@@ -4092,9 +4161,8 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
(addr - SRAM_BASE_ADDR));
- } else if ((addr >= DRAM_PHYS_BASE) &&
- (addr <=
- DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {
+ } else if (addr <=
+ DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
u64 bar_base_addr = DRAM_PHYS_BASE +
(addr & ~(prop->dram_pci_bar_size - 0x1ull));
@@ -4627,7 +4695,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
sizeof(struct packet_msg_prot);
- cb = hl_cb_kernel_create(hdev, cb_size);
+ cb = hl_cb_kernel_create(hdev, cb_size, false);
if (!cb)
return -ENOMEM;
@@ -5157,19 +5225,14 @@ static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
return RREG32(mmHW_STATE);
}
-u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
+static int goya_ctx_init(struct hl_ctx *ctx)
{
- return cq_idx;
-}
-
-static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx)
-{
-
+ return 0;
}
-static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
+u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
{
-
+ return cq_idx;
}
static u32 goya_get_signal_cb_size(struct hl_device *hdev)
@@ -5280,13 +5343,12 @@ static const struct hl_asic_funcs goya_funcs = {
.rreg = hl_rreg,
.wreg = hl_wreg,
.halt_coresight = goya_halt_coresight,
+ .ctx_init = goya_ctx_init,
.get_clk_rate = goya_get_clk_rate,
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
.read_device_fw_version = goya_read_device_fw_version,
.load_firmware_to_device = goya_load_firmware_to_device,
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
- .ext_queue_init = goya_ext_queue_init,
- .ext_queue_reset = goya_ext_queue_reset,
.get_signal_cb_size = goya_get_signal_cb_size,
.get_wait_cb_size = goya_get_wait_cb_size,
.gen_signal_cb = goya_gen_signal_cb,