summaryrefslogtreecommitdiff
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/habanalabs/Makefile2
-rw-r--r--drivers/misc/habanalabs/context.c19
-rw-r--r--drivers/misc/habanalabs/device.c20
-rw-r--r--drivers/misc/habanalabs/goya/goya.c395
-rw-r--r--drivers/misc/habanalabs/habanalabs.h189
-rw-r--r--drivers/misc/habanalabs/habanalabs_drv.c2
-rw-r--r--drivers/misc/habanalabs/habanalabs_ioctl.c3
-rw-r--r--drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h46
-rw-r--r--drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h15
-rw-r--r--drivers/misc/habanalabs/memory.c1517
-rw-r--r--drivers/misc/habanalabs/mmu.c691
11 files changed, 2892 insertions, 7 deletions
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index d2fd0e18b1eb..fd46f8b48bab 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -6,7 +6,7 @@ obj-m := habanalabs.o
habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
- command_submission.o
+ command_submission.o mmu.o
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index c3854714b46c..619ace1c4ef7 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -25,8 +25,10 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
dma_fence_put(ctx->cs_pending[i]);
- if (ctx->asid != HL_KERNEL_ASID_ID)
+ if (ctx->asid != HL_KERNEL_ASID_ID) {
+ hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
+ }
}
void hl_ctx_do_release(struct kref *ref)
@@ -96,6 +98,8 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
+ int rc = 0;
+
ctx->hdev = hdev;
kref_init(&ctx->refcount);
@@ -113,9 +117,22 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
dev_err(hdev->dev, "No free ASID, failed to create context\n");
return -ENOMEM;
}
+
+ rc = hl_vm_ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init mem ctx module\n");
+ rc = -ENOMEM;
+ goto mem_ctx_err;
+ }
}
return 0;
+
+mem_ctx_err:
+ if (ctx->asid != HL_KERNEL_ASID_ID)
+ hl_asid_free(hdev, ctx->asid);
+
+ return rc;
}
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index cc5f068df597..d0929022655b 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -615,8 +615,10 @@ again:
/* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, hard_reset);
- if (hard_reset)
+ if (hard_reset) {
+ hl_vm_fini(hdev);
hl_eq_reset(hdev, &hdev->event_queue);
+ }
/* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
hl_hw_queue_reset(hdev, hard_reset);
@@ -677,6 +679,13 @@ again:
goto out_err;
}
+ rc = hl_vm_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to init memory module after hard reset\n");
+ goto out_err;
+ }
+
hl_set_max_power(hdev, hdev->max_power);
hdev->hard_reset_pending = false;
@@ -861,6 +870,13 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->asic_name,
hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
+ rc = hl_vm_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize memory module\n");
+ rc = 0;
+ goto out_disabled;
+ }
+
/*
* hl_hwmon_init must be called after device_late_init, because only
* there we get the information from the device about which
@@ -977,6 +993,8 @@ void hl_device_fini(struct hl_device *hdev)
/* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, true);
+ hl_vm_fini(hdev);
+
hl_eq_fini(hdev, &hdev->event_queue);
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index e3878fd7dc94..89b82b989966 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -6,6 +6,8 @@
*/
#include "goyaP.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+#include "include/hw_ip/mmu/mmu_v1_0.h"
#include "include/goya/asic_reg/goya_masks.h"
#include <linux/pci.h>
@@ -80,6 +82,7 @@
#define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
#define GOYA_CPU_TIMEOUT_USEC 10000000 /* 10s */
#define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
+#define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
#define GOYA_QMAN0_FENCE_VAL 0xD169B243
@@ -131,6 +134,70 @@ static const char *goya_axi_name[GOYA_MAX_INITIATORS] = {
"MMU"
};
+static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
+ mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
+ mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
+ mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
+ mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
+ mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
+ mmTPC0_QM_GLBL_SECURE_PROPS,
+ mmTPC0_QM_GLBL_NON_SECURE_PROPS,
+ mmTPC0_CMDQ_GLBL_SECURE_PROPS,
+ mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
+ mmTPC0_CFG_ARUSER,
+ mmTPC0_CFG_AWUSER,
+ mmTPC1_QM_GLBL_SECURE_PROPS,
+ mmTPC1_QM_GLBL_NON_SECURE_PROPS,
+ mmTPC1_CMDQ_GLBL_SECURE_PROPS,
+ mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
+ mmTPC1_CFG_ARUSER,
+ mmTPC1_CFG_AWUSER,
+ mmTPC2_QM_GLBL_SECURE_PROPS,
+ mmTPC2_QM_GLBL_NON_SECURE_PROPS,
+ mmTPC2_CMDQ_GLBL_SECURE_PROPS,
+ mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
+ mmTPC2_CFG_ARUSER,
+ mmTPC2_CFG_AWUSER,
+ mmTPC3_QM_GLBL_SECURE_PROPS,
+ mmTPC3_QM_GLBL_NON_SECURE_PROPS,
+ mmTPC3_CMDQ_GLBL_SECURE_PROPS,
+ mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
+ mmTPC3_CFG_ARUSER,
+ mmTPC3_CFG_AWUSER,
+ mmTPC4_QM_GLBL_SECURE_PROPS,
+ mmTPC4_QM_GLBL_NON_SECURE_PROPS,
+ mmTPC4_CMDQ_GLBL_SECURE_PROPS,
+ mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
+ mmTPC4_CFG_ARUSER,
+ mmTPC4_CFG_AWUSER,
+ mmTPC5_QM_GLBL_SECURE_PROPS,
+ mmTPC5_QM_GLBL_NON_SECURE_PROPS,
+ mmTPC5_CMDQ_GLBL_SECURE_PROPS,
+ mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
+ mmTPC5_CFG_ARUSER,
+ mmTPC5_CFG_AWUSER,
+ mmTPC6_QM_GLBL_SECURE_PROPS,
+ mmTPC6_QM_GLBL_NON_SECURE_PROPS,
+ mmTPC6_CMDQ_GLBL_SECURE_PROPS,
+ mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
+ mmTPC6_CFG_ARUSER,
+ mmTPC6_CFG_AWUSER,
+ mmTPC7_QM_GLBL_SECURE_PROPS,
+ mmTPC7_QM_GLBL_NON_SECURE_PROPS,
+ mmTPC7_CMDQ_GLBL_SECURE_PROPS,
+ mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
+ mmTPC7_CFG_ARUSER,
+ mmTPC7_CFG_AWUSER,
+ mmMME_QM_GLBL_SECURE_PROPS,
+ mmMME_QM_GLBL_NON_SECURE_PROPS,
+ mmMME_CMDQ_GLBL_SECURE_PROPS,
+ mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
+ mmMME_SBA_CONTROL_DATA,
+ mmMME_SBB_CONTROL_DATA,
+ mmMME_SBC_CONTROL_DATA,
+ mmMME_WBC_CONTROL_DATA
+};
+
#define GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE 121
static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = {
@@ -258,6 +325,10 @@ static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = {
};
static int goya_armcp_info_get(struct hl_device *hdev);
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
+static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
+ u64 phys_addr);
static void goya_get_fixed_properties(struct hl_device *hdev)
{
@@ -296,6 +367,16 @@ static void goya_get_fixed_properties(struct hl_device *hdev)
prop->sram_user_base_address = prop->sram_base_address +
SRAM_USER_BASE_OFFSET;
+ prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
+ if (hdev->pldm)
+ prop->mmu_pgt_size = 0x800000; /* 8MB */
+ else
+ prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
+ prop->mmu_pte_size = HL_PTE_SIZE;
+ prop->mmu_hop_table_size = HOP_TABLE_SIZE;
+ prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
+ prop->dram_page_size = PAGE_SIZE_2MB;
+
prop->host_phys_base_address = HOST_PHYS_BASE;
prop->va_space_host_start_address = VA_HOST_SPACE_START;
prop->va_space_host_end_address = VA_HOST_SPACE_END;
@@ -752,7 +833,18 @@ static int goya_late_init(struct hl_device *hdev)
goya_fetch_psoc_frequency(hdev);
+ rc = goya_mmu_clear_pgt_range(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
+ goto disable_pci_access;
+ }
+
return 0;
+
+disable_pci_access:
+ goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+
+ return rc;
}
/*
@@ -2565,6 +2657,54 @@ out:
return 0;
}
+static int goya_mmu_init(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
+ u64 hop0_addr;
+ int rc, i;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ if (goya->hw_cap_initialized & HW_CAP_MMU)
+ return 0;
+
+ hdev->dram_supports_virtual_memory = true;
+
+ for (i = 0 ; i < prop->max_asid ; i++) {
+ hop0_addr = prop->mmu_pgt_addr +
+ (i * prop->mmu_hop_table_size);
+
+ rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to set hop0 addr for asid %d\n", i);
+ goto err;
+ }
+ }
+
+ goya->hw_cap_initialized |= HW_CAP_MMU;
+
+ /* init MMU cache manage page */
+ WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
+ WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR << 40);
+
+ /* Remove follower feature due to performance bug */
+ WREG32_AND(mmSTLB_STLB_FEATURE_EN,
+ (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
+
+ hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
+
+ WREG32(mmMMU_MMU_ENABLE, 1);
+ WREG32(mmMMU_SPI_MASK, 0xF);
+
+ return 0;
+
+err:
+ return rc;
+}
+
/*
* goya_hw_init - Goya hardware initialization code
*
@@ -2614,6 +2754,10 @@ static int goya_hw_init(struct hl_device *hdev)
return rc;
}
+ rc = goya_mmu_init(hdev);
+ if (rc)
+ return rc;
+
goya_init_security(hdev);
goya_init_dma_qmans(hdev);
@@ -4249,6 +4393,10 @@ int goya_context_switch(struct hl_device *hdev, u32 asid)
rc = goya_send_job_on_qman0(hdev, job);
+ /* no point in setting the asid in case of failure */
+ if (!rc)
+ goya_mmu_prepare(hdev, asid);
+
job->patched_cb->cs_cnt--;
hl_cb_put(job->patched_cb);
@@ -4284,6 +4432,22 @@ void goya_restore_phase_topology(struct hl_device *hdev)
i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
}
+static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
+{
+ struct goya_device *goya = hdev->asic_specific;
+
+ return readq(hdev->pcie_bar[DDR_BAR_ID] +
+ (addr - goya->ddr_bar_cur_addr));
+}
+
+static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
+{
+ struct goya_device *goya = hdev->asic_specific;
+
+ writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
+ (addr - goya->ddr_bar_cur_addr));
+}
+
static void goya_get_axi_name(struct hl_device *hdev, u32 agent_id,
u16 event_type, char *axi_name, int len)
{
@@ -4567,6 +4731,233 @@ void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
return goya->events_stat;
}
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
+ struct packet_lin_dma *clear_pgt_range_pkt;
+ struct hl_cs_parser parser;
+ struct hl_cs_job *job;
+ u32 cb_size;
+ struct hl_cb *cb;
+ int rc;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return 0;
+
+ cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ if (!cb)
+ return -EFAULT;
+
+ clear_pgt_range_pkt = (struct packet_lin_dma *)
+ (uintptr_t) cb->kernel_address;
+
+ memset(clear_pgt_range_pkt, 0, sizeof(*clear_pgt_range_pkt));
+ cb_size = sizeof(*clear_pgt_range_pkt);
+
+ clear_pgt_range_pkt->ctl =
+ ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
+ (DMA_HOST_TO_DRAM << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT) |
+ (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
+ (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
+ (1 << GOYA_PKT_CTL_RB_SHIFT) |
+ (1 << GOYA_PKT_CTL_MB_SHIFT));
+
+ clear_pgt_range_pkt->src_addr = 0;
+ clear_pgt_range_pkt->dst_addr = prop->mmu_pgt_addr;
+ clear_pgt_range_pkt->tsize = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
+
+ job = hl_cs_allocate_job(hdev, true);
+ if (!job) {
+ dev_err(hdev->dev, "Failed to allocate a new job\n");
+ rc = -ENOMEM;
+ goto release_cb;
+ }
+
+ job->id = 0;
+ job->user_cb = cb;
+ job->user_cb->cs_cnt++;
+ job->user_cb_size = cb_size;
+ job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
+
+ parser.ctx_id = HL_KERNEL_ASID_ID;
+ parser.cs_sequence = 0;
+ parser.job_id = job->id;
+ parser.hw_queue_id = job->hw_queue_id;
+ parser.job_userptr_list = &job->userptr_list;
+ parser.user_cb = job->user_cb;
+ parser.user_cb_size = job->user_cb_size;
+ parser.ext_queue = job->ext_queue;
+ parser.use_virt_addr = hdev->mmu_enable;
+
+ rc = hdev->asic_funcs->cs_parser(hdev, &parser);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to parse kernel CB when clearing pgt\n");
+ goto free_job;
+ }
+
+ job->patched_cb = parser.patched_cb;
+ job->job_cb_size = parser.patched_cb_size;
+ job->patched_cb->cs_cnt++;
+
+ rc = goya_send_job_on_qman0(hdev, job);
+
+ job->patched_cb->cs_cnt--;
+ hl_cb_put(job->patched_cb);
+
+free_job:
+ hl_userptr_delete_list(hdev, &job->userptr_list);
+ kfree(job);
+ cb->cs_cnt--;
+
+release_cb:
+ hl_cb_put(cb);
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+ return rc;
+}
+
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
+{
+ struct goya_device *goya = hdev->asic_specific;
+ int i;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return;
+
+ if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
+ WARN(1, "asid %u is too big\n", asid);
+ return;
+ }
+
+ /* zero the MMBP and ASID bits and then set the ASID */
+ for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++) {
+ WREG32_AND(goya_mmu_regs[i], ~0x7FF);
+ WREG32_OR(goya_mmu_regs[i], asid);
+ }
+}
+
+static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
+{
+ struct goya_device *goya = hdev->asic_specific;
+ u32 status, timeout_usec;
+ int rc;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return;
+
+ /* no need in L1 only invalidation in Goya */
+ if (!is_hard)
+ return;
+
+ if (hdev->pldm)
+ timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
+ else
+ timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
+
+ mutex_lock(&hdev->mmu_cache_lock);
+
+ /* L0 & L1 invalidation */
+ WREG32(mmSTLB_INV_ALL_START, 1);
+
+ rc = hl_poll_timeout(
+ hdev,
+ mmSTLB_INV_ALL_START,
+ status,
+ !status,
+ 1000,
+ timeout_usec);
+
+ mutex_unlock(&hdev->mmu_cache_lock);
+
+ if (rc)
+ dev_notice_ratelimited(hdev->dev,
+ "Timeout when waiting for MMU cache invalidation\n");
+}
+
+static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
+ bool is_hard, u32 asid, u64 va, u64 size)
+{
+ struct goya_device *goya = hdev->asic_specific;
+ u32 status, timeout_usec, inv_data, pi;
+ int rc;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return;
+
+ /* no need in L1 only invalidation in Goya */
+ if (!is_hard)
+ return;
+
+ if (hdev->pldm)
+ timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
+ else
+ timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
+
+ mutex_lock(&hdev->mmu_cache_lock);
+
+ /*
+ * TODO: currently invalidate entire L0 & L1 as in regular hard
+ * invalidation. Need to apply invalidation of specific cache lines with
+ * mask of ASID & VA & size.
+ * Note that L1 with be flushed entirely in any case.
+ */
+
+ /* L0 & L1 invalidation */
+ inv_data = RREG32(mmSTLB_CACHE_INV);
+ /* PI is 8 bit */
+ pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
+ WREG32(mmSTLB_CACHE_INV,
+ (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
+
+ rc = hl_poll_timeout(
+ hdev,
+ mmSTLB_INV_CONSUMER_INDEX,
+ status,
+ status == pi,
+ 1000,
+ timeout_usec);
+
+ mutex_unlock(&hdev->mmu_cache_lock);
+
+ if (rc)
+ dev_notice_ratelimited(hdev->dev,
+ "Timeout when waiting for MMU cache invalidation\n");
+}
+
+static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
+ u64 phys_addr)
+{
+ u32 status, timeout_usec;
+ int rc;
+
+ if (hdev->pldm)
+ timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
+ else
+ timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
+
+ WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
+ WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
+ WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
+
+ rc = hl_poll_timeout(
+ hdev,
+ MMU_ASID_BUSY,
+ status,
+ !(status & 0x80000000),
+ 1000,
+ timeout_usec);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Timeout during MMU hop0 config of asid %d\n", asid);
+ return rc;
+ }
+
+ return 0;
+}
+
int goya_send_heartbeat(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
@@ -4830,6 +5221,10 @@ static const struct hl_asic_funcs goya_funcs = {
.handle_eqe = goya_handle_eqe,
.set_pll_profile = goya_set_pll_profile,
.get_events_stat = goya_get_events_stat,
+ .read_pte = goya_read_pte,
+ .write_pte = goya_write_pte,
+ .mmu_invalidate_cache = goya_mmu_invalidate_cache,
+ .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
.send_heartbeat = goya_send_heartbeat,
.enable_clock_gating = goya_init_clock_gating,
.disable_clock_gating = goya_disable_clock_gating,
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 9adc7c6ec08b..03085e7a12dd 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -19,6 +19,7 @@
#include <linux/dma-fence.h>
#include <linux/dma-direction.h>
#include <linux/scatterlist.h>
+#include <linux/hashtable.h>
#define HL_NAME "habanalabs"
@@ -39,6 +40,31 @@
/* MUST BE POWER OF 2 and larger than 1 */
#define HL_MAX_PENDING_CS 64
+/* Memory */
+#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
+
+/* MMU */
+#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
+
+/**
+ * struct pgt_info - MMU hop page info.
+ * @node: hash linked-list node for the pgts hash of pgts.
+ * @addr: physical address of the pgt.
+ * @ctx: pointer to the owner ctx.
+ * @num_of_ptes: indicates how many ptes are used in the pgt.
+ *
+ * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop)
+ * is needed during mapping, a new page is allocated and this structure holds
+ * its essential information. During unmapping, if no valid PTEs remained in the
+ * page, it is freed with its pgt_info structure.
+ */
+struct pgt_info {
+ struct hlist_node node;
+ u64 addr;
+ struct hl_ctx *ctx;
+ int num_of_ptes;
+};
+
struct hl_device;
struct hl_fpriv;
@@ -72,11 +98,11 @@ struct hw_queue_properties {
/**
* enum vm_type_t - virtual memory mapping request information.
* @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
- * @VM_TYPE_PHYS_LIST: mapping of DRAM memory to device virtual address.
+ * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
*/
enum vm_type_t {
VM_TYPE_USERPTR,
- VM_TYPE_PHYS_LIST
+ VM_TYPE_PHYS_PACK
};
/**
@@ -117,6 +143,12 @@ enum hl_device_hw_state {
* mapping DRAM memory.
* @va_space_dram_end_address: end address of virtual memory range for
* mapping DRAM memory.
+ * @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
+ * @mmu_pgt_size: MMU page tables total size.
+ * @mmu_pte_size: PTE size in MMU page tables.
+ * @mmu_hop_table_size: MMU hop table size.
+ * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
+ * @dram_page_size: page size for MMU DRAM allocation.
* @cfg_size: configuration space size on SRAM.
* @sram_size: total size of SRAM.
* @max_asid: maximum number of open contexts (ASIDs).
@@ -150,6 +182,12 @@ struct asic_fixed_properties {
u64 va_space_host_end_address;
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
+ u64 mmu_pgt_addr;
+ u32 mmu_pgt_size;
+ u32 mmu_pte_size;
+ u32 mmu_hop_table_size;
+ u32 mmu_hop0_tables_total_size;
+ u32 dram_page_size;
u32 cfg_size;
u32 sram_size;
u32 max_asid;
@@ -419,6 +457,12 @@ enum hl_pll_frequency {
* @handle_eqe: handle event queue entry (IRQ) from ArmCP.
* @set_pll_profile: change PLL profile (manual/automatic).
* @get_events_stat: retrieve event queue entries histogram.
+ * @read_pte: read MMU page table entry from DRAM.
+ * @write_pte: write MMU page table entry to DRAM.
+ * @mmu_invalidate_cache: flush MMU STLB cache, either with soft (L1 only) or
+ * hard (L0 & L1) flush.
+ * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
+ * ASID-VA-size mask.
* @send_heartbeat: send is-alive packet to ArmCP and verify response.
* @enable_clock_gating: enable clock gating for reducing power consumption.
* @disable_clock_gating: disable clock for accessing registers on HBW.
@@ -483,6 +527,11 @@ struct hl_asic_funcs {
void (*set_pll_profile)(struct hl_device *hdev,
enum hl_pll_frequency freq);
void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
+ u64 (*read_pte)(struct hl_device *hdev, u64 addr);
+ void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
+ void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard);
+ void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
+ u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
void (*enable_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
@@ -505,16 +554,39 @@ struct hl_asic_funcs {
#define HL_KERNEL_ASID_ID 0
/**
+ * struct hl_va_range - virtual addresses range.
+ * @lock: protects the virtual addresses list.
+ * @list: list of virtual addresses blocks available for mappings.
+ * @start_addr: range start address.
+ * @end_addr: range end address.
+ */
+struct hl_va_range {
+ struct mutex lock;
+ struct list_head list;
+ u64 start_addr;
+ u64 end_addr;
+};
+
+/**
* struct hl_ctx - user/kernel context.
+ * @mem_hash: holds mapping from virtual address to virtual memory area
+ * descriptor (hl_vm_phys_pg_list or hl_userptr).
+ * @mmu_hash: holds a mapping from virtual address to pgt_info structure.
* @hpriv: pointer to the private (KMD) data of the process (fd).
* @hdev: pointer to the device structure.
* @refcount: reference counter for the context. Context is released only when
* this hits 0l. It is incremented on CS and CS_WAIT.
* @cs_pending: array of DMA fence objects representing pending CS.
+ * @host_va_range: holds available virtual addresses for host mappings.
+ * @dram_va_range: holds available virtual addresses for DRAM mappings.
+ * @mem_hash_lock: protects the mem_hash.
+ * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
+ * MMU hash or walking the PGT requires talking this lock
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
* to user so user could inquire about CS. It is used as
* index to cs_pending array.
* @cs_lock: spinlock to protect cs_sequence.
+ * @dram_phys_mem: amount of used physical DRAM memory by this context.
* @thread_restore_token: token to prevent multiple threads of the same context
* from running the restore phase. Only one thread
* should run it.
@@ -524,12 +596,19 @@ struct hl_asic_funcs {
* @asid: context's unique address space ID in the device's MMU.
*/
struct hl_ctx {
+ DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
+ DECLARE_HASHTABLE(mmu_hash, MMU_HASH_TABLE_BITS);
struct hl_fpriv *hpriv;
struct hl_device *hdev;
struct kref refcount;
struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
+ struct hl_va_range host_va_range;
+ struct hl_va_range dram_va_range;
+ struct mutex mem_hash_lock;
+ struct mutex mmu_lock;
u64 cs_sequence;
spinlock_t cs_lock;
+ atomic64_t dram_phys_mem;
atomic_t thread_restore_token;
u32 thread_restore_wait_token;
u32 asid;
@@ -673,6 +752,85 @@ struct hl_cs_parser {
/*
+ * MEMORY STRUCTURE
+ */
+
+/**
+ * struct hl_vm_hash_node - hash element from virtual address to virtual
+ * memory area descriptor (hl_vm_phys_pg_list or
+ * hl_userptr).
+ * @node: node to hang on the hash table in context object.
+ * @vaddr: key virtual address.
+ * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr).
+ */
+struct hl_vm_hash_node {
+ struct hlist_node node;
+ u64 vaddr;
+ void *ptr;
+};
+
+/**
+ * struct hl_vm_phys_pg_pack - physical page pack.
+ * @vm_type: describes the type of the virtual area descriptor.
+ * @pages: the physical page array.
+ * @mapping_cnt: number of shared mappings.
+ * @asid: the context related to this list.
+ * @npages: num physical pages in the pack.
+ * @page_size: size of each page in the pack.
+ * @total_size: total size of all the pages in this list.
+ * @flags: HL_MEM_* flags related to this list.
+ * @handle: the provided handle related to this list.
+ * @offset: offset from the first page.
+ * @contiguous: is contiguous physical memory.
+ * @created_from_userptr: is product of host virtual address.
+ */
+struct hl_vm_phys_pg_pack {
+ enum vm_type_t vm_type; /* must be first */
+ u64 *pages;
+ atomic_t mapping_cnt;
+ u32 asid;
+ u32 npages;
+ u32 page_size;
+ u32 total_size;
+ u32 flags;
+ u32 handle;
+ u32 offset;
+ u8 contiguous;
+ u8 created_from_userptr;
+};
+
+/**
+ * struct hl_vm_va_block - virtual range block information.
+ * @node: node to hang on the virtual range list in context object.
+ * @start: virtual range start address.
+ * @end: virtual range end address.
+ * @size: virtual range size.
+ */
+struct hl_vm_va_block {
+ struct list_head node;
+ u64 start;
+ u64 end;
+ u64 size;
+};
+
+/**
+ * struct hl_vm - virtual memory manager for MMU.
+ * @dram_pg_pool: pool for DRAM physical pages of 2MB.
+ * @dram_pg_pool_refcount: reference counter for the pool usage.
+ * @idr_lock: protects the phys_pg_list_handles.
+ * @phys_pg_pack_handles: idr to hold all device allocations handles.
+ * @init_done: whether initialization was done. We need this because VM
+ * initialization might be skipped during device initialization.
+ */
+struct hl_vm {
+ struct gen_pool *dram_pg_pool;
+ struct kref dram_pg_pool_refcount;
+ spinlock_t idr_lock;
+ struct idr phys_pg_pack_handles;
+ u8 init_done;
+};
+
+/*
* FILE PRIVATE STRUCTURE
*/
@@ -787,12 +945,16 @@ struct hl_device_reset_work {
* @asic_prop: ASIC specific immutable properties.
* @asic_funcs: ASIC specific functions.
* @asic_specific: ASIC specific information to use only from ASIC files.
+ * @mmu_pgt_pool: pool of available MMU hops.
+ * @vm: virtual memory manager for MMU.
+ * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context
* @hwmon_dev: H/W monitor device.
* @pm_mng_profile: current power management profile.
* @hl_chip_info: ASIC's sensors information.
* @cb_pool: list of preallocated CBs.
* @cb_pool_lock: protects the CB pool.
* @user_ctx: current user context executing.
+ * @dram_used_mem: current DRAM memory consumption.
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @fd_open_cnt: number of open user processes.
@@ -812,6 +974,7 @@ struct hl_device_reset_work {
* @heartbeat: is heartbeat sanity check towards ArmCP enabled.
* @reset_on_lockup: true if a reset should be done in case of stuck CS, false
* otherwise.
+ * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
* @init_done: is the initialization of the device done.
* @mmu_enable: is MMU enabled.
*/
@@ -846,6 +1009,9 @@ struct hl_device {
struct asic_fixed_properties asic_prop;
const struct hl_asic_funcs *asic_funcs;
void *asic_specific;
+ struct gen_pool *mmu_pgt_pool;
+ struct hl_vm vm;
+ struct mutex mmu_cache_lock;
struct device *hwmon_dev;
enum hl_pm_mng_profile pm_mng_profile;
struct hwmon_chip_info *hl_chip_info;
@@ -856,6 +1022,7 @@ struct hl_device {
/* TODO: remove user_ctx for multiple process support */
struct hl_ctx *user_ctx;
+ atomic64_t dram_used_mem;
atomic_t in_reset;
atomic_t curr_pll_profile;
atomic_t fd_open_cnt;
@@ -872,6 +1039,7 @@ struct hl_device {
u8 hard_reset_pending;
u8 heartbeat;
u8 reset_on_lockup;
+ u8 dram_supports_virtual_memory;
u8 init_done;
/* Parameters for bring-up */
@@ -1021,6 +1189,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
void hl_hpriv_get(struct hl_fpriv *hpriv);
void hl_hpriv_put(struct hl_fpriv *hpriv);
int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
+
int hl_build_hwmon_channel_info(struct hl_device *hdev,
struct armcp_sensor *sensors_arr);
@@ -1048,6 +1217,12 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue);
void goya_set_asic_funcs(struct hl_device *hdev);
+int hl_vm_ctx_init(struct hl_ctx *ctx);
+void hl_vm_ctx_fini(struct hl_ctx *ctx);
+
+int hl_vm_init(struct hl_device *hdev);
+void hl_vm_fini(struct hl_device *hdev);
+
int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u32 size,
struct hl_userptr *userptr);
int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
@@ -1057,6 +1232,15 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
struct list_head *userptr_list,
struct hl_userptr **userptr);
+int hl_mmu_init(struct hl_device *hdev);
+void hl_mmu_fini(struct hl_device *hdev);
+void hl_mmu_ctx_init(struct hl_ctx *ctx);
+void hl_mmu_ctx_fini(struct hl_ctx *ctx);
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
+void hl_mmu_swap_out(struct hl_ctx *ctx);
+void hl_mmu_swap_in(struct hl_ctx *ctx);
+
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
@@ -1074,5 +1258,6 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
#endif /* HABANALABSP_H_ */
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index 77a1cc85e530..436ccae0989d 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -188,7 +188,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->reset_on_lockup = reset_on_lockup;
/* Parameters for bring-up - set them to defaults */
- hdev->mmu_enable = 0;
+ hdev->mmu_enable = 1;
hdev->cpu_enable = 1;
hdev->reset_pcilink = 0;
hdev->cpu_queues_enable = 1;
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index 481db1a5e97e..6e4dc5b5e696 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -18,7 +18,8 @@
static const struct hl_ioctl_desc hl_ioctls[] = {
HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
- HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl)
+ HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
+ HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl)
};
#define HL_CORE_IOCTL_COUNT ARRAY_SIZE(hl_ioctls)
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
new file mode 100644
index 000000000000..1bc36aba1426
--- /dev/null
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef INCLUDE_MMU_GENERAL_H_
+#define INCLUDE_MMU_GENERAL_H_
+
+#define PAGE_SHIFT_4KB 12
+#define PAGE_SHIFT_2MB 21
+#define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB)
+#define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB)
+#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1))
+
+#define PAGE_PRESENT_MASK 0x0000000000001
+#define SWAP_OUT_MASK 0x0000000000004
+#define LAST_MASK 0x0000000000800
+#define PHYS_ADDR_MASK 0x3FFFFFFFFF000ull
+#define HOP0_MASK 0x3000000000000ull
+#define HOP1_MASK 0x0FF8000000000ull
+#define HOP2_MASK 0x0007FC0000000ull
+#define HOP3_MASK 0x000003FE00000
+#define HOP4_MASK 0x00000001FF000
+#define OFFSET_MASK 0x0000000000FFF
+
+#define HOP0_SHIFT 48
+#define HOP1_SHIFT 39
+#define HOP2_SHIFT 30
+#define HOP3_SHIFT 21
+#define HOP4_SHIFT 12
+
+#define PTE_PHYS_ADDR_SHIFT 12
+#define PTE_PHYS_ADDR_MASK ~0xFFF
+
+#define HL_PTE_SIZE sizeof(u64)
+#define HOP_TABLE_SIZE PAGE_SIZE_4KB
+#define HOP0_TABLES_TOTAL_SIZE (HOP_TABLE_SIZE * MAX_ASID)
+
+#define MMU_HOP0_PA43_12_SHIFT 12
+#define MMU_HOP0_PA49_44_SHIFT (12 + 32)
+
+#define MMU_CONFIG_TIMEOUT_USEC 2000 /* 2 ms */
+
+#endif /* INCLUDE_MMU_GENERAL_H_ */
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h
new file mode 100644
index 000000000000..8539dd041f2c
--- /dev/null
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef INCLUDE_MMU_V1_0_H_
+#define INCLUDE_MMU_V1_0_H_
+
+#define MMU_HOP0_PA43_12 0x490004
+#define MMU_HOP0_PA49_44 0x490008
+#define MMU_ASID_BUSY 0x490000
+
+#endif /* INCLUDE_MMU_V1_0_H_ */
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index ad14376a1c25..6650c8085fc6 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -5,10 +5,1198 @@
* All Rights Reserved.
*/
+#include <uapi/misc/habanalabs.h>
#include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/genalloc.h>
+
+#define PGS_IN_2MB_PAGE (PAGE_SIZE_2MB >> PAGE_SHIFT)
+#define HL_MMU_DEBUG 0
+
+/*
+ * The va ranges in context object contain a list with the available chunks of
+ * device virtual memory.
+ * There is one range for host allocations and one for DRAM allocations.
+ *
+ * On initialization each range contains one chunk of all of its available
+ * virtual range which is a half of the total device virtual range.
+ *
+ * On each mapping of physical pages, a suitable virtual range chunk (with a
+ * minimum size) is selected from the list. If the chunk size equals the
+ * requested size, the chunk is returned. Otherwise, the chunk is split into
+ * two chunks - one to return as result and a remainder to stay in the list.
+ *
+ * On each Unmapping of a virtual address, the relevant virtual chunk is
+ * returned to the list. The chunk is added to the list and if its edges match
+ * the edges of the adjacent chunks (means a contiguous chunk can be created),
+ * the chunks are merged.
+ *
+ * On finish, the list is checked to have only one chunk of all the relevant
+ * virtual range (which is a half of the device total virtual range).
+ * If not (means not all mappings were unmapped), a warning is printed.
+ */
+
+/*
+ * alloc_device_memory - allocate device memory
+ *
+ * @ctx : current context
+ * @args : host parameters containing the requested size
+ * @ret_handle : result handle
+ *
+ * This function does the following:
+ * - Allocate the requested size rounded up to 2MB pages
+ * - Return unique handle
+ */
+static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
+ u32 *ret_handle)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+ u64 paddr = 0;
+ u32 total_size, num_pgs, num_curr_pgs, page_size, page_shift;
+ int handle, rc, i;
+ bool contiguous;
+
+ num_curr_pgs = 0;
+ page_size = hdev->asic_prop.dram_page_size;
+ page_shift = __ffs(page_size);
+ num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
+ total_size = num_pgs << page_shift;
+
+ contiguous = args->flags & HL_MEM_CONTIGUOUS;
+
+ if (contiguous) {
+ paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
+ if (!paddr) {
+ dev_err(hdev->dev,
+ "failed to allocate %u huge contiguous pages\n",
+ num_pgs);
+ return -ENOMEM;
+ }
+ }
+
+ phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
+ if (!phys_pg_pack) {
+ rc = -ENOMEM;
+ goto pages_pack_err;
+ }
+
+ phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
+ phys_pg_pack->asid = ctx->asid;
+ phys_pg_pack->npages = num_pgs;
+ phys_pg_pack->page_size = page_size;
+ phys_pg_pack->total_size = total_size;
+ phys_pg_pack->flags = args->flags;
+ phys_pg_pack->contiguous = contiguous;
+
+ phys_pg_pack->pages = kcalloc(num_pgs, sizeof(u64), GFP_KERNEL);
+ if (!phys_pg_pack->pages) {
+ rc = -ENOMEM;
+ goto pages_arr_err;
+ }
+
+ if (phys_pg_pack->contiguous) {
+ for (i = 0 ; i < num_pgs ; i++)
+ phys_pg_pack->pages[i] = paddr + i * page_size;
+ } else {
+ for (i = 0 ; i < num_pgs ; i++) {
+ phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
+ vm->dram_pg_pool,
+ page_size);
+ if (!phys_pg_pack->pages[i]) {
+ dev_err(hdev->dev,
+ "ioctl failed to allocate page\n");
+ rc = -ENOMEM;
+ goto page_err;
+ }
+
+ num_curr_pgs++;
+ }
+ }
+
+ spin_lock(&vm->idr_lock);
+ handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
+ GFP_KERNEL);
+ spin_unlock(&vm->idr_lock);
+
+ if (handle < 0) {
+ dev_err(hdev->dev, "Failed to get handle for page\n");
+ rc = -EFAULT;
+ goto idr_err;
+ }
+
+ for (i = 0 ; i < num_pgs ; i++)
+ kref_get(&vm->dram_pg_pool_refcount);
+
+ phys_pg_pack->handle = handle;
+
+ atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
+ atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
+
+ *ret_handle = handle;
+
+ return 0;
+
+idr_err:
+page_err:
+ if (!phys_pg_pack->contiguous)
+ for (i = 0 ; i < num_curr_pgs ; i++)
+ gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
+ page_size);
+
+ kfree(phys_pg_pack->pages);
+pages_arr_err:
+ kfree(phys_pg_pack);
+pages_pack_err:
+ if (contiguous)
+ gen_pool_free(vm->dram_pg_pool, paddr, total_size);
+
+ return rc;
+}
+
+/*
+ * get_userptr_from_host_va - initialize userptr structure from given host
+ * virtual address
+ *
+ * @hdev : habanalabs device structure
+ * @args : parameters containing the virtual address and size
+ * @p_userptr : pointer to result userptr structure
+ *
+ * This function does the following:
+ * - Allocate userptr structure
+ * - Pin the given host memory using the userptr structure
+ * - Perform DMA mapping to have the DMA addresses of the pages
+ */
+static int get_userptr_from_host_va(struct hl_device *hdev,
+ struct hl_mem_in *args, struct hl_userptr **p_userptr)
+{
+ struct hl_userptr *userptr;
+ int rc;
+
+ userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
+ if (!userptr) {
+ rc = -ENOMEM;
+ goto userptr_err;
+ }
+
+ rc = hl_pin_host_memory(hdev, args->map_host.host_virt_addr,
+ args->map_host.mem_size, userptr);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to pin host memory\n");
+ goto pin_err;
+ }
+
+ rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
+ userptr->sgt->nents, DMA_BIDIRECTIONAL);
+ if (rc) {
+ dev_err(hdev->dev, "failed to map sgt with DMA region\n");
+ goto dma_map_err;
+ }
+
+ userptr->dma_mapped = true;
+ userptr->dir = DMA_BIDIRECTIONAL;
+ userptr->vm_type = VM_TYPE_USERPTR;
+
+ *p_userptr = userptr;
+
+ return 0;
+
+dma_map_err:
+ hl_unpin_host_memory(hdev, userptr);
+pin_err:
+ kfree(userptr);
+userptr_err:
+
+ return rc;
+}
+
+/*
+ * free_userptr - free userptr structure
+ *
+ * @hdev : habanalabs device structure
+ * @userptr : userptr to free
+ *
+ * This function does the following:
+ * - Unpins the physical pages
+ * - Frees the userptr structure
+ */
+static void free_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
+{
+ hl_unpin_host_memory(hdev, userptr);
+ kfree(userptr);
+}
+
+/*
+ * dram_pg_pool_do_release - free DRAM pages pool
+ *
+ * @ref : pointer to reference object
+ *
+ * This function does the following:
+ * - Frees the idr structure of physical pages handles
+ * - Frees the generic pool of DRAM physical pages
+ */
+static void dram_pg_pool_do_release(struct kref *ref)
+{
+ struct hl_vm *vm = container_of(ref, struct hl_vm,
+ dram_pg_pool_refcount);
+
+ /*
+ * free the idr here as only here we know for sure that there are no
+ * allocated physical pages and hence there are no handles in use
+ */
+ idr_destroy(&vm->phys_pg_pack_handles);
+ gen_pool_destroy(vm->dram_pg_pool);
+}
+
+/*
+ * free_phys_pg_pack - free physical page pack
+ *
+ * @hdev : habanalabs device structure
+ * @phys_pg_pack : physical page pack to free
+ *
+ * This function does the following:
+ * - For DRAM memory only, iterate over the pack and free each physical block
+ * structure by returning it to the general pool
+ * - Free the hl_vm_phys_pg_pack structure
+ */
+static void free_phys_pg_pack(struct hl_device *hdev,
+ struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+ struct hl_vm *vm = &hdev->vm;
+ int i;
+
+ if (!phys_pg_pack->created_from_userptr) {
+ if (phys_pg_pack->contiguous) {
+ gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+ phys_pg_pack->total_size);
+
+ for (i = 0; i < phys_pg_pack->npages ; i++)
+ kref_put(&vm->dram_pg_pool_refcount,
+ dram_pg_pool_do_release);
+ } else {
+ for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+ gen_pool_free(vm->dram_pg_pool,
+ phys_pg_pack->pages[i],
+ phys_pg_pack->page_size);
+ kref_put(&vm->dram_pg_pool_refcount,
+ dram_pg_pool_do_release);
+ }
+ }
+ }
+
+ kfree(phys_pg_pack->pages);
+ kfree(phys_pg_pack);
+}
+
+/*
+ * free_device_memory - free device memory
+ *
+ * @ctx : current context
+ * @handle : handle of the memory chunk to free
+ *
+ * This function does the following:
+ * - Free the device memory related to the given handle
+ */
+static int free_device_memory(struct hl_ctx *ctx, u32 handle)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+
+ spin_lock(&vm->idr_lock);
+ phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+ if (phys_pg_pack) {
+ if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
+ dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
+ handle);
+ spin_unlock(&vm->idr_lock);
+ return -EINVAL;
+ }
+
+ /*
+ * must remove from idr before the freeing of the physical
+ * pages as the refcount of the pool is also the trigger of the
+ * idr destroy
+ */
+ idr_remove(&vm->phys_pg_pack_handles, handle);
+ spin_unlock(&vm->idr_lock);
+
+ atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
+ atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
+
+ free_phys_pg_pack(hdev, phys_pg_pack);
+ } else {
+ spin_unlock(&vm->idr_lock);
+ dev_err(hdev->dev,
+ "free device memory failed, no match for handle %u\n",
+ handle);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * clear_va_list_locked - free virtual addresses list
+ *
+ * @hdev : habanalabs device structure
+ * @va_list : list of virtual addresses to free
+ *
+ * This function does the following:
+ * - Iterate over the list and free each virtual addresses block
+ *
+ * This function should be called only when va_list lock is taken
+ */
+static void clear_va_list_locked(struct hl_device *hdev,
+ struct list_head *va_list)
+{
+ struct hl_vm_va_block *va_block, *tmp;
+
+ list_for_each_entry_safe(va_block, tmp, va_list, node) {
+ list_del(&va_block->node);
+ kfree(va_block);
+ }
+}
+
+/*
+ * print_va_list_locked - print virtual addresses list
+ *
+ * @hdev : habanalabs device structure
+ * @va_list : list of virtual addresses to print
+ *
+ * This function does the following:
+ * - Iterate over the list and print each virtual addresses block
+ *
+ * This function should be called only when va_list lock is taken
+ */
+static void print_va_list_locked(struct hl_device *hdev,
+ struct list_head *va_list)
+{
+#if HL_MMU_DEBUG
+ struct hl_vm_va_block *va_block;
+
+ dev_dbg(hdev->dev, "print va list:\n");
+
+ list_for_each_entry(va_block, va_list, node)
+ dev_dbg(hdev->dev,
+ "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
+ va_block->start, va_block->end, va_block->size);
+#endif
+}
+
+/*
+ * merge_va_blocks_locked - merge a virtual block if possible
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @va_list : pointer to the virtual addresses block list
+ * @va_block : virtual block to merge with adjacent blocks
+ *
+ * This function does the following:
+ * - Merge the given blocks with the adjacent blocks if their virtual ranges
+ * create a contiguous virtual range
+ *
+ * This Function should be called only when va_list lock is taken
+ */
+static void merge_va_blocks_locked(struct hl_device *hdev,
+ struct list_head *va_list, struct hl_vm_va_block *va_block)
+{
+ struct hl_vm_va_block *prev, *next;
+
+ prev = list_prev_entry(va_block, node);
+ if (&prev->node != va_list && prev->end + 1 == va_block->start) {
+ prev->end = va_block->end;
+ prev->size = prev->end - prev->start;
+ list_del(&va_block->node);
+ kfree(va_block);
+ va_block = prev;
+ }
+
+ next = list_next_entry(va_block, node);
+ if (&next->node != va_list && va_block->end + 1 == next->start) {
+ next->start = va_block->start;
+ next->size = next->end - next->start;
+ list_del(&va_block->node);
+ kfree(va_block);
+ }
+}
+
+/*
+ * add_va_block_locked - add a virtual block to the virtual addresses list
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @va_list : pointer to the virtual addresses block list
+ * @start : start virtual address
+ * @end : end virtual address
+ *
+ * This function does the following:
+ * - Add the given block to the virtual blocks list and merge with other
+ * blocks if a contiguous virtual block can be created
+ *
+ * This Function should be called only when va_list lock is taken
+ */
+static int add_va_block_locked(struct hl_device *hdev,
+ struct list_head *va_list, u64 start, u64 end)
+{
+ struct hl_vm_va_block *va_block, *res = NULL;
+ u64 size = end - start;
+
+ print_va_list_locked(hdev, va_list);
+
+ list_for_each_entry(va_block, va_list, node) {
+ /* TODO: remove upon matureness */
+ if (hl_mem_area_crosses_range(start, size, va_block->start,
+ va_block->end)) {
+ dev_err(hdev->dev,
+ "block crossing ranges at start 0x%llx, end 0x%llx\n",
+ va_block->start, va_block->end);
+ return -EINVAL;
+ }
+
+ if (va_block->end < start)
+ res = va_block;
+ }
+
+ va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
+ if (!va_block)
+ return -ENOMEM;
+
+ va_block->start = start;
+ va_block->end = end;
+ va_block->size = size;
+
+ if (!res)
+ list_add(&va_block->node, va_list);
+ else
+ list_add(&va_block->node, &res->node);
+
+ merge_va_blocks_locked(hdev, va_list, va_block);
+
+ print_va_list_locked(hdev, va_list);
+
+ return 0;
+}
+
+/*
+ * add_va_block - wrapper for add_va_block_locked
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @va_list : pointer to the virtual addresses block list
+ * @start : start virtual address
+ * @end : end virtual address
+ *
+ * This function does the following:
+ * - Takes the list lock and calls add_va_block_locked
+ */
+static inline int add_va_block(struct hl_device *hdev,
+ struct hl_va_range *va_range, u64 start, u64 end)
+{
+ int rc;
+
+ mutex_lock(&va_range->lock);
+ rc = add_va_block_locked(hdev, &va_range->list, start, end);
+ mutex_unlock(&va_range->lock);
+
+ return rc;
+}
+
+/*
+ * get_va_block - get a virtual block with the requested size
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @va_range : pointer to the virtual addresses range
+ * @size : requested block size
+ * @hint_addr : hint for request address by the user
+ * @is_userptr : is host or DRAM memory
+ *
+ * This function does the following:
+ * - Iterate on the virtual block list to find a suitable virtual block for the
+ * requested size
+ * - Reserve the requested block and update the list
+ * - Return the start address of the virtual block
+ */
+static u64 get_va_block(struct hl_device *hdev,
+ struct hl_va_range *va_range, u32 size, u64 hint_addr,
+ bool is_userptr)
+{
+ struct hl_vm_va_block *va_block, *new_va_block = NULL;
+ u64 valid_start, valid_size, prev_start, prev_end, page_mask,
+ res_valid_start = 0, res_valid_size = 0;
+ u32 page_size;
+ bool add_prev = false;
+
+ if (is_userptr) {
+ /*
+ * We cannot know if the user allocated memory with huge pages
+ * or not, hence we continue with the biggest possible
+ * granularity.
+ */
+ page_size = PAGE_SIZE_2MB;
+ page_mask = PAGE_MASK_2MB;
+ } else {
+ page_size = hdev->asic_prop.dram_page_size;
+ page_mask = ~((u64)page_size - 1);
+ }
+
+ mutex_lock(&va_range->lock);
+
+ print_va_list_locked(hdev, &va_range->list);
+
+ list_for_each_entry(va_block, &va_range->list, node) {
+ /* calc the first possible aligned addr */
+ valid_start = va_block->start;
+
+
+ if (valid_start & (page_size - 1)) {
+ valid_start &= page_mask;
+ valid_start += page_size;
+ if (valid_start > va_block->end)
+ continue;
+ }
+
+ valid_size = va_block->end - valid_start;
+
+ if (valid_size >= size &&
+ (!new_va_block || valid_size < res_valid_size)) {
+
+ new_va_block = va_block;
+ res_valid_start = valid_start;
+ res_valid_size = valid_size;
+ }
+
+ if (hint_addr && hint_addr >= valid_start &&
+ ((hint_addr + size) <= va_block->end)) {
+ new_va_block = va_block;
+ res_valid_start = hint_addr;
+ res_valid_size = valid_size;
+ break;
+ }
+ }
+
+ if (!new_va_block) {
+ dev_err(hdev->dev, "no available va block for size %u\n", size);
+ goto out;
+ }
+
+ if (res_valid_start > new_va_block->start) {
+ prev_start = new_va_block->start;
+ prev_end = res_valid_start - 1;
+
+ new_va_block->start = res_valid_start;
+ new_va_block->size = res_valid_size;
+
+ add_prev = true;
+ }
+
+ if (new_va_block->size > size) {
+ new_va_block->start += size;
+ new_va_block->size = new_va_block->end - new_va_block->start;
+ } else {
+ list_del(&new_va_block->node);
+ kfree(new_va_block);
+ }
+
+ if (add_prev)
+ add_va_block_locked(hdev, &va_range->list, prev_start,
+ prev_end);
+
+ print_va_list_locked(hdev, &va_range->list);
+out:
+ mutex_unlock(&va_range->lock);
+
+ return res_valid_start;
+}
+
+/*
+ * get_sg_info - get number of pages and the DMA address from SG list
+ *
+ * @sg : the SG list
+ * @dma_addr : pointer to DMA address to return
+ *
+ * Calculate the number of consecutive pages described by the SG list. Take the
+ * offset of the address in the first page, add to it the length and round it up
+ * to the number of needed pages.
+ */
+static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
+{
+ *dma_addr = sg_dma_address(sg);
+
+ return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
+ (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+}
+
+/*
+ * init_phys_pg_pack_from_userptr - initialize physical page pack from host
+ * memory
+ *
+ * @ctx : current context
+ * @userptr : userptr to initialize from
+ * @pphys_pg_pack : res pointer
+ *
+ * This function does the following:
+ * - Pin the physical pages related to the given virtual block
+ * - Create a physical page pack from the physical pages related to the given
+ * virtual block
+ */
+static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
+ struct hl_userptr *userptr,
+ struct hl_vm_phys_pg_pack **pphys_pg_pack)
+{
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+ struct scatterlist *sg;
+ dma_addr_t dma_addr;
+ u64 page_mask;
+ u32 npages, total_npages, page_size = PAGE_SIZE;
+ bool first = true, is_huge_page_opt = true;
+ int rc, i, j;
+
+ phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
+ if (!phys_pg_pack)
+ return -ENOMEM;
+
+ phys_pg_pack->vm_type = userptr->vm_type;
+ phys_pg_pack->created_from_userptr = true;
+ phys_pg_pack->asid = ctx->asid;
+ atomic_set(&phys_pg_pack->mapping_cnt, 1);
+
+ /* Only if all dma_addrs are aligned to 2MB and their
+ * sizes is at least 2MB, we can use huge page mapping.
+ * We limit the 2MB optimization to this condition,
+ * since later on we acquire the related VA range as one
+ * consecutive block.
+ */
+ total_npages = 0;
+ for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+ npages = get_sg_info(sg, &dma_addr);
+
+ total_npages += npages;
+
+ if (first) {
+ first = false;
+ dma_addr &= PAGE_MASK_2MB;
+ }
+
+ if ((npages % PGS_IN_2MB_PAGE) ||
+ (dma_addr & (PAGE_SIZE_2MB - 1)))
+ is_huge_page_opt = false;
+ }
+
+ if (is_huge_page_opt) {
+ page_size = PAGE_SIZE_2MB;
+ total_npages /= PGS_IN_2MB_PAGE;
+ }
+
+ page_mask = ~(((u64) page_size) - 1);
+
+ phys_pg_pack->pages = kcalloc(total_npages, sizeof(u64), GFP_KERNEL);
+ if (!phys_pg_pack->pages) {
+ rc = -ENOMEM;
+ goto page_pack_arr_mem_err;
+ }
+
+ phys_pg_pack->npages = total_npages;
+ phys_pg_pack->page_size = page_size;
+ phys_pg_pack->total_size = total_npages * page_size;
+
+ j = 0;
+ first = true;
+ for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+ npages = get_sg_info(sg, &dma_addr);
+
+ /* align down to physical page size and save the offset */
+ if (first) {
+ first = false;
+ phys_pg_pack->offset = dma_addr & (page_size - 1);
+ dma_addr &= page_mask;
+ }
+
+ while (npages) {
+ phys_pg_pack->pages[j++] = dma_addr;
+ dma_addr += page_size;
+
+ if (is_huge_page_opt)
+ npages -= PGS_IN_2MB_PAGE;
+ else
+ npages--;
+ }
+ }
+
+ *pphys_pg_pack = phys_pg_pack;
+
+ return 0;
+
+page_pack_arr_mem_err:
+ kfree(phys_pg_pack);
+
+ return rc;
+}
+
+/*
+ * map_phys_page_pack - maps the physical page pack
+ *
+ * @ctx : current context
+ * @vaddr : start address of the virtual area to map from
+ * @phys_pg_pack : the pack of physical pages to map to
+ *
+ * This function does the following:
+ * - Maps each chunk of virtual memory to matching physical chunk
+ * - Stores number of successful mappings in the given argument
+ * - Returns 0 on success, error code otherwise.
+ */
+static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
+ struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+ struct hl_device *hdev = ctx->hdev;
+ u64 next_vaddr = vaddr, paddr;
+ u32 page_size = phys_pg_pack->page_size;
+ int i, rc = 0, mapped_pg_cnt = 0;
+
+ for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+ paddr = phys_pg_pack->pages[i];
+
+ /* For accessing the host we need to turn on bit 39 */
+ if (phys_pg_pack->created_from_userptr)
+ paddr += hdev->asic_prop.host_phys_base_address;
+
+ rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
+ if (rc) {
+ dev_err(hdev->dev,
+ "map failed for handle %u, npages: %d, mapped: %d",
+ phys_pg_pack->handle, phys_pg_pack->npages,
+ mapped_pg_cnt);
+ goto err;
+ }
+
+ mapped_pg_cnt++;
+ next_vaddr += page_size;
+ }
+
+ return 0;
+
+err:
+ next_vaddr = vaddr;
+ for (i = 0 ; i < mapped_pg_cnt ; i++) {
+ if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
+ phys_pg_pack->handle, next_vaddr,
+ phys_pg_pack->pages[i], page_size);
+
+ next_vaddr += page_size;
+ }
+
+ return rc;
+}
+
+static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
+ u64 *paddr)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+ u32 handle;
+
+ handle = lower_32_bits(args->map_device.handle);
+ spin_lock(&vm->idr_lock);
+ phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+ if (!phys_pg_pack) {
+ spin_unlock(&vm->idr_lock);
+ dev_err(hdev->dev, "no match for handle %u\n", handle);
+ return -EINVAL;
+ }
+
+ *paddr = phys_pg_pack->pages[0];
+
+ spin_unlock(&vm->idr_lock);
+
+ return 0;
+}
+
+/*
+ * map_device_va - map the given memory
+ *
+ * @ctx : current context
+ * @args : host parameters with handle/host virtual address
+ * @device_addr : pointer to result device virtual address
+ *
+ * This function does the following:
+ * - If given a physical device memory handle, map to a device virtual block
+ * and return the start address of this block
+ * - If given a host virtual address and size, find the related physical pages,
+ * map a device virtual block to this pages and return the start address of
+ * this block
+ */
+static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
+ u64 *device_addr)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+ struct hl_userptr *userptr = NULL;
+ struct hl_vm_hash_node *hnode;
+ enum vm_type_t *vm_type;
+ u64 ret_vaddr, hint_addr;
+ u32 handle = 0;
+ int rc;
+ bool is_userptr = args->flags & HL_MEM_USERPTR;
+
+ /* Assume failure */
+ *device_addr = 0;
+
+ if (is_userptr) {
+ rc = get_userptr_from_host_va(hdev, args, &userptr);
+ if (rc) {
+ dev_err(hdev->dev, "failed to get userptr from va\n");
+ return rc;
+ }
+
+ rc = init_phys_pg_pack_from_userptr(ctx, userptr,
+ &phys_pg_pack);
+ if (rc) {
+ dev_err(hdev->dev,
+ "unable to init page pack for vaddr 0x%llx\n",
+ args->map_host.host_virt_addr);
+ goto init_page_pack_err;
+ }
+
+ vm_type = (enum vm_type_t *) userptr;
+ hint_addr = args->map_host.hint_addr;
+ } else {
+ handle = lower_32_bits(args->map_device.handle);
+
+ spin_lock(&vm->idr_lock);
+ phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+ if (!phys_pg_pack) {
+ spin_unlock(&vm->idr_lock);
+ dev_err(hdev->dev,
+ "no match for handle %u\n", handle);
+ return -EINVAL;
+ }
+
+ /* increment now to avoid freeing device memory while mapping */
+ atomic_inc(&phys_pg_pack->mapping_cnt);
+
+ spin_unlock(&vm->idr_lock);
+
+ vm_type = (enum vm_type_t *) phys_pg_pack;
+
+ hint_addr = args->map_device.hint_addr;
+ }
+
+ /*
+ * relevant for mapping device physical memory only, as host memory is
+ * implicitly shared
+ */
+ if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
+ phys_pg_pack->asid != ctx->asid) {
+ dev_err(hdev->dev,
+ "Failed to map memory, handle %u is not shared\n",
+ handle);
+ rc = -EPERM;
+ goto shared_err;
+ }
+
+ hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
+ if (!hnode) {
+ rc = -ENOMEM;
+ goto hnode_err;
+ }
+
+ ret_vaddr = get_va_block(hdev,
+ is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
+ phys_pg_pack->total_size, hint_addr, is_userptr);
+ if (!ret_vaddr) {
+ dev_err(hdev->dev, "no available va block for handle %u\n",
+ handle);
+ rc = -ENOMEM;
+ goto va_block_err;
+ }
+
+ mutex_lock(&ctx->mmu_lock);
+
+ rc = map_phys_page_pack(ctx, ret_vaddr, phys_pg_pack);
+ if (rc) {
+ mutex_unlock(&ctx->mmu_lock);
+ dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
+ handle);
+ goto map_err;
+ }
+
+ hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false, ctx->asid,
+ ret_vaddr, phys_pg_pack->total_size);
+
+ mutex_unlock(&ctx->mmu_lock);
+
+ ret_vaddr += phys_pg_pack->offset;
+
+ hnode->ptr = vm_type;
+ hnode->vaddr = ret_vaddr;
+
+ mutex_lock(&ctx->mem_hash_lock);
+ hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
+ mutex_unlock(&ctx->mem_hash_lock);
+
+ *device_addr = ret_vaddr;
+
+ if (is_userptr)
+ free_phys_pg_pack(hdev, phys_pg_pack);
+
+ return 0;
+
+map_err:
+ if (add_va_block(hdev,
+ is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
+ ret_vaddr,
+ ret_vaddr + phys_pg_pack->total_size - 1))
+ dev_warn(hdev->dev,
+ "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
+ handle, ret_vaddr);
+
+va_block_err:
+ kfree(hnode);
+hnode_err:
+shared_err:
+ atomic_dec(&phys_pg_pack->mapping_cnt);
+ if (is_userptr)
+ free_phys_pg_pack(hdev, phys_pg_pack);
+init_page_pack_err:
+ if (is_userptr)
+ free_userptr(hdev, userptr);
+
+ return rc;
+}
+
+/*
+ * unmap_device_va - unmap the given device virtual address
+ *
+ * @ctx : current context
+ * @vaddr : device virtual address to unmap
+ *
+ * This function does the following:
+ * - Unmap the physical pages related to the given virtual address
+ * - return the device virtual block to the virtual block list
+ */
+static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+ struct hl_vm_hash_node *hnode = NULL;
+ struct hl_userptr *userptr = NULL;
+ enum vm_type_t *vm_type;
+ u64 next_vaddr;
+ u32 page_size;
+ bool is_userptr;
+ int i, rc;
+
+ /* protect from double entrance */
+ mutex_lock(&ctx->mem_hash_lock);
+ hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
+ if (vaddr == hnode->vaddr)
+ break;
+
+ if (!hnode) {
+ mutex_unlock(&ctx->mem_hash_lock);
+ dev_err(hdev->dev,
+ "unmap failed, no mem hnode for vaddr 0x%llx\n",
+ vaddr);
+ return -EINVAL;
+ }
+
+ hash_del(&hnode->node);
+ mutex_unlock(&ctx->mem_hash_lock);
+
+ vm_type = hnode->ptr;
+
+ if (*vm_type == VM_TYPE_USERPTR) {
+ is_userptr = true;
+ userptr = hnode->ptr;
+ rc = init_phys_pg_pack_from_userptr(ctx, userptr,
+ &phys_pg_pack);
+ if (rc) {
+ dev_err(hdev->dev,
+ "unable to init page pack for vaddr 0x%llx\n",
+ vaddr);
+ goto vm_type_err;
+ }
+ } else if (*vm_type == VM_TYPE_PHYS_PACK) {
+ is_userptr = false;
+ phys_pg_pack = hnode->ptr;
+ } else {
+ dev_warn(hdev->dev,
+ "unmap failed, unknown vm desc for vaddr 0x%llx\n",
+ vaddr);
+ rc = -EFAULT;
+ goto vm_type_err;
+ }
+
+ if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
+ dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
+ rc = -EINVAL;
+ goto mapping_cnt_err;
+ }
+
+ page_size = phys_pg_pack->page_size;
+ vaddr &= ~(((u64) page_size) - 1);
+
+ next_vaddr = vaddr;
+
+ mutex_lock(&ctx->mmu_lock);
+
+ for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size)
+ if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+ dev_warn_ratelimited(hdev->dev,
+ "unmap failed for vaddr: 0x%llx\n", next_vaddr);
+
+ hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true, ctx->asid,
+ vaddr, phys_pg_pack->total_size);
+
+ mutex_unlock(&ctx->mmu_lock);
+
+ if (add_va_block(hdev,
+ is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
+ vaddr,
+ vaddr + phys_pg_pack->total_size - 1))
+ dev_warn(hdev->dev, "add va block failed for vaddr: 0x%llx\n",
+ vaddr);
+
+ atomic_dec(&phys_pg_pack->mapping_cnt);
+ kfree(hnode);
+
+ if (is_userptr) {
+ free_phys_pg_pack(hdev, phys_pg_pack);
+ free_userptr(hdev, userptr);
+ }
+
+ return 0;
+
+mapping_cnt_err:
+ if (is_userptr)
+ free_phys_pg_pack(hdev, phys_pg_pack);
+vm_type_err:
+ mutex_lock(&ctx->mem_hash_lock);
+ hash_add(ctx->mem_hash, &hnode->node, vaddr);
+ mutex_unlock(&ctx->mem_hash_lock);
+
+ return rc;
+}
+
+int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ union hl_mem_args *args = data;
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_ctx *ctx = hpriv->ctx;
+ u64 device_addr = 0;
+ u32 handle = 0;
+ int rc;
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is disabled or in reset. Can't execute memory IOCTL\n");
+ return -EBUSY;
+ }
+
+ if (hdev->mmu_enable) {
+ switch (args->in.op) {
+ case HL_MEM_OP_ALLOC:
+ if (!hdev->dram_supports_virtual_memory) {
+ dev_err(hdev->dev,
+ "DRAM alloc is not supported\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ if (args->in.alloc.mem_size == 0) {
+ dev_err(hdev->dev,
+ "alloc size must be larger than 0\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ rc = alloc_device_memory(ctx, &args->in, &handle);
+
+ memset(args, 0, sizeof(*args));
+ args->out.handle = (__u64) handle;
+ break;
+
+ case HL_MEM_OP_FREE:
+ if (!hdev->dram_supports_virtual_memory) {
+ dev_err(hdev->dev,
+ "DRAM free is not supported\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ rc = free_device_memory(ctx, args->in.free.handle);
+ break;
+
+ case HL_MEM_OP_MAP:
+ rc = map_device_va(ctx, &args->in, &device_addr);
+
+ memset(args, 0, sizeof(*args));
+ args->out.device_virt_addr = device_addr;
+ break;
+
+ case HL_MEM_OP_UNMAP:
+ rc = unmap_device_va(ctx,
+ args->in.unmap.device_virt_addr);
+ break;
+
+ default:
+ dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+ rc = -ENOTTY;
+ break;
+ }
+ } else {
+ switch (args->in.op) {
+ case HL_MEM_OP_ALLOC:
+ if (args->in.alloc.mem_size == 0) {
+ dev_err(hdev->dev,
+ "alloc size must be larger than 0\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* Force contiguous as there are no real MMU
+ * translations to overcome physical memory gaps
+ */
+ args->in.flags |= HL_MEM_CONTIGUOUS;
+ rc = alloc_device_memory(ctx, &args->in, &handle);
+
+ memset(args, 0, sizeof(*args));
+ args->out.handle = (__u64) handle;
+ break;
+
+ case HL_MEM_OP_FREE:
+ rc = free_device_memory(ctx, args->in.free.handle);
+ break;
+
+ case HL_MEM_OP_MAP:
+ if (args->in.flags & HL_MEM_USERPTR) {
+ device_addr = args->in.map_host.host_virt_addr;
+ rc = 0;
+ } else {
+ rc = get_paddr_from_handle(ctx, &args->in,
+ &device_addr);
+ }
+
+ memset(args, 0, sizeof(*args));
+ args->out.device_virt_addr = device_addr;
+ break;
+
+ case HL_MEM_OP_UNMAP:
+ rc = 0;
+ break;
+
+ default:
+ dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+ rc = -ENOTTY;
+ break;
+ }
+ }
+
+out:
+ return rc;
+}
/*
* hl_pin_host_memory - pins a chunk of host memory
@@ -196,3 +1384,332 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
return false;
}
+
+/*
+ * hl_va_range_init - initialize virtual addresses range
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @va_range : pointer to the range to initialize
+ * @start : range start address
+ * @end : range end address
+ *
+ * This function does the following:
+ * - Initializes the virtual addresses list of the given range with the given
+ * addresses.
+ */
+static int hl_va_range_init(struct hl_device *hdev,
+ struct hl_va_range *va_range, u64 start, u64 end)
+{
+ int rc;
+
+ INIT_LIST_HEAD(&va_range->list);
+
+ /* PAGE_SIZE alignment */
+
+ if (start & (PAGE_SIZE - 1)) {
+ start &= PAGE_MASK;
+ start += PAGE_SIZE;
+ }
+
+ if (end & (PAGE_SIZE - 1))
+ end &= PAGE_MASK;
+
+ if (start >= end) {
+ dev_err(hdev->dev, "too small vm range for va list\n");
+ return -EFAULT;
+ }
+
+ rc = add_va_block(hdev, va_range, start, end);
+
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init host va list\n");
+ return rc;
+ }
+
+ va_range->start_addr = start;
+ va_range->end_addr = end;
+
+ return 0;
+}
+
+/*
+ * hl_vm_ctx_init_with_ranges - initialize virtual memory for context
+ *
+ * @ctx : pointer to the habanalabs context structure
+ * @host_range_start : host virtual addresses range start
+ * @host_range_end : host virtual addresses range end
+ * @dram_range_start : dram virtual addresses range start
+ * @dram_range_end : dram virtual addresses range end
+ *
+ * This function initializes the following:
+ * - MMU for context
+ * - Virtual address to area descriptor hashtable
+ * - Virtual block list of available virtual memory
+ */
+int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
+ u64 host_range_end, u64 dram_range_start,
+ u64 dram_range_end)
+{
+ struct hl_device *hdev = ctx->hdev;
+ int rc;
+
+ hl_mmu_ctx_init(ctx);
+
+ mutex_init(&ctx->mem_hash_lock);
+ hash_init(ctx->mem_hash);
+
+ mutex_init(&ctx->host_va_range.lock);
+
+ rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start,
+ host_range_end);
+ if (rc) {
+ dev_err(hdev->dev, "failed to init host vm range\n");
+ goto host_vm_err;
+ }
+
+ mutex_init(&ctx->dram_va_range.lock);
+
+ rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start,
+ dram_range_end);
+ if (rc) {
+ dev_err(hdev->dev, "failed to init dram vm range\n");
+ goto dram_vm_err;
+ }
+
+ return 0;
+
+dram_vm_err:
+ mutex_destroy(&ctx->dram_va_range.lock);
+
+ mutex_lock(&ctx->host_va_range.lock);
+ clear_va_list_locked(hdev, &ctx->host_va_range.list);
+ mutex_unlock(&ctx->host_va_range.lock);
+host_vm_err:
+ mutex_destroy(&ctx->host_va_range.lock);
+ mutex_destroy(&ctx->mem_hash_lock);
+ hl_mmu_ctx_fini(ctx);
+
+ return rc;
+}
+
+int hl_vm_ctx_init(struct hl_ctx *ctx)
+{
+ struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+ u64 host_range_start, host_range_end, dram_range_start,
+ dram_range_end;
+
+ atomic64_set(&ctx->dram_phys_mem, 0);
+
+ /*
+ * - If MMU is enabled, init the ranges as usual.
+ * - If MMU is disabled, in case of host mapping, the returned address
+ * is the given one.
+ * In case of DRAM mapping, the returned address is the physical
+ * address of the memory related to the given handle.
+ */
+ if (ctx->hdev->mmu_enable) {
+ dram_range_start = prop->va_space_dram_start_address;
+ dram_range_end = prop->va_space_dram_end_address;
+ host_range_start = prop->va_space_host_start_address;
+ host_range_end = prop->va_space_host_end_address;
+ } else {
+ dram_range_start = prop->dram_user_base_address;
+ dram_range_end = prop->dram_end_address;
+ host_range_start = prop->dram_user_base_address;
+ host_range_end = prop->dram_end_address;
+ }
+
+ return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
+ dram_range_start, dram_range_end);
+}
+
+/*
+ * hl_va_range_fini - clear a virtual addresses range
+ *
+ * @hdev : pointer to the habanalabs structure
+ * va_range : pointer to virtual addresses range
+ *
+ * This function initializes the following:
+ * - Checks that the given range contains the whole initial range
+ * - Frees the virtual addresses block list and its lock
+ */
+static void hl_va_range_fini(struct hl_device *hdev,
+ struct hl_va_range *va_range)
+{
+ struct hl_vm_va_block *va_block;
+
+ if (list_empty(&va_range->list)) {
+ dev_warn(hdev->dev,
+ "va list should not be empty on cleanup!\n");
+ goto out;
+ }
+
+ if (!list_is_singular(&va_range->list)) {
+ dev_warn(hdev->dev,
+ "va list should not contain multiple blocks on cleanup!\n");
+ goto free_va_list;
+ }
+
+ va_block = list_first_entry(&va_range->list, typeof(*va_block), node);
+
+ if (va_block->start != va_range->start_addr ||
+ va_block->end != va_range->end_addr) {
+ dev_warn(hdev->dev,
+ "wrong va block on cleanup, from 0x%llx to 0x%llx\n",
+ va_block->start, va_block->end);
+ goto free_va_list;
+ }
+
+free_va_list:
+ mutex_lock(&va_range->lock);
+ clear_va_list_locked(hdev, &va_range->list);
+ mutex_unlock(&va_range->lock);
+
+out:
+ mutex_destroy(&va_range->lock);
+}
+
+/*
+ * hl_vm_ctx_fini - virtual memory teardown of context
+ *
+ * @ctx : pointer to the habanalabs context structure
+ *
+ * This function perform teardown the following:
+ * - Virtual block list of available virtual memory
+ * - Virtual address to area descriptor hashtable
+ * - MMU for context
+ *
+ * In addition this function does the following:
+ * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
+ * hashtable should be empty as no valid mappings should exist at this
+ * point.
+ * - Frees any existing physical page list from the idr which relates to the
+ * current context asid.
+ * - This function checks the virtual block list for correctness. At this point
+ * the list should contain one element which describes the whole virtual
+ * memory range of the context. Otherwise, a warning is printed.
+ */
+void hl_vm_ctx_fini(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_list;
+ struct hl_vm_hash_node *hnode;
+ struct hlist_node *tmp_node;
+ int i;
+
+ if (!hash_empty(ctx->mem_hash))
+ dev_notice(hdev->dev, "ctx is freed while it has va in use\n");
+
+ hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
+ dev_dbg(hdev->dev,
+ "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
+ hnode->vaddr, ctx->asid);
+ unmap_device_va(ctx, hnode->vaddr);
+ }
+
+ spin_lock(&vm->idr_lock);
+ idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
+ if (phys_pg_list->asid == ctx->asid) {
+ dev_dbg(hdev->dev,
+ "page list 0x%p of asid %d is still alive\n",
+ phys_pg_list, ctx->asid);
+ free_phys_pg_pack(hdev, phys_pg_list);
+ idr_remove(&vm->phys_pg_pack_handles, i);
+ }
+ spin_unlock(&vm->idr_lock);
+
+ hl_va_range_fini(hdev, &ctx->dram_va_range);
+ hl_va_range_fini(hdev, &ctx->host_va_range);
+
+ mutex_destroy(&ctx->mem_hash_lock);
+ hl_mmu_ctx_fini(ctx);
+}
+
+/*
+ * hl_vm_init - initialize virtual memory module
+ *
+ * @hdev : pointer to the habanalabs device structure
+ *
+ * This function initializes the following:
+ * - MMU module
+ * - DRAM physical pages pool of 2MB
+ * - Idr for device memory allocation handles
+ */
+int hl_vm_init(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_vm *vm = &hdev->vm;
+ int rc;
+
+ rc = hl_mmu_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init MMU\n");
+ return rc;
+ }
+
+ vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
+ if (!vm->dram_pg_pool) {
+ dev_err(hdev->dev, "Failed to create dram page pool\n");
+ rc = -ENOMEM;
+ goto pool_create_err;
+ }
+
+ kref_init(&vm->dram_pg_pool_refcount);
+
+ rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
+ prop->dram_end_address - prop->dram_user_base_address,
+ -1);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to add memory to dram page pool %d\n", rc);
+ goto pool_add_err;
+ }
+
+ spin_lock_init(&vm->idr_lock);
+ idr_init(&vm->phys_pg_pack_handles);
+
+ atomic64_set(&hdev->dram_used_mem, 0);
+
+ vm->init_done = true;
+
+ return 0;
+
+pool_add_err:
+ gen_pool_destroy(vm->dram_pg_pool);
+pool_create_err:
+ hl_mmu_fini(hdev);
+
+ return rc;
+}
+
+/*
+ * hl_vm_fini - virtual memory module teardown
+ *
+ * @hdev : pointer to the habanalabs device structure
+ *
+ * This function perform teardown to the following:
+ * - Idr for device memory allocation handles
+ * - DRAM physical pages pool of 2MB
+ * - MMU module
+ */
+void hl_vm_fini(struct hl_device *hdev)
+{
+ struct hl_vm *vm = &hdev->vm;
+
+ if (!vm->init_done)
+ return;
+
+ /*
+ * At this point all the contexts should be freed and hence no DRAM
+ * memory should be in use. Hence the DRAM pool should be freed here.
+ */
+ if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
+ dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
+ __func__);
+
+ hl_mmu_fini(hdev);
+
+ vm->init_done = false;
+}
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
new file mode 100644
index 000000000000..79c70d92e74b
--- /dev/null
+++ b/drivers/misc/habanalabs/mmu.c
@@ -0,0 +1,691 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+
+static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 addr)
+{
+ struct pgt_info *pgt_info = NULL;
+
+ hash_for_each_possible(ctx->mmu_hash, pgt_info, node,
+ (unsigned long) addr)
+ if (addr == pgt_info->addr)
+ break;
+
+ return pgt_info;
+}
+
+static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
+{
+ struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+
+ gen_pool_free(pgt_info->ctx->hdev->mmu_pgt_pool, pgt_info->addr,
+ ctx->hdev->asic_prop.mmu_hop_table_size);
+ hash_del(&pgt_info->node);
+
+ kfree(pgt_info);
+}
+
+static u64 alloc_hop(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct pgt_info *pgt_info;
+ u64 addr;
+
+ pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
+ if (!pgt_info)
+ return ULLONG_MAX;
+
+ addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
+ hdev->asic_prop.mmu_hop_table_size);
+ if (!addr) {
+ dev_err(hdev->dev, "failed to allocate page\n");
+ kfree(pgt_info);
+ return ULLONG_MAX;
+ }
+
+ pgt_info->addr = addr;
+ pgt_info->ctx = ctx;
+ pgt_info->num_of_ptes = 0;
+ hash_add(ctx->mmu_hash, &pgt_info->node, addr);
+
+ return addr;
+}
+
+static inline void clear_pte(struct hl_device *hdev, u64 pte_addr)
+{
+ /* clear the last and present bits */
+ hdev->asic_funcs->write_pte(hdev, pte_addr, 0);
+}
+
+static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+ get_pgt_info(ctx, hop_addr)->num_of_ptes++;
+}
+
+/*
+ * put_pte - decrement the num of ptes and free the hop if possible
+ *
+ * @ctx: pointer to the context structure
+ * @hop_addr: addr of the hop
+ *
+ * This function returns the number of ptes left on this hop. If the number is
+ * 0, it means the pte was freed.
+ */
+static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+ struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+ int num_of_ptes_left;
+
+ pgt_info->num_of_ptes--;
+
+ /*
+ * Need to save the number of ptes left because free_hop might free
+ * the pgt_info
+ */
+ num_of_ptes_left = pgt_info->num_of_ptes;
+ if (!num_of_ptes_left)
+ free_hop(ctx, hop_addr);
+
+ return num_of_ptes_left;
+}
+
+static inline u64 get_hop0_addr(struct hl_ctx *ctx)
+{
+ return ctx->hdev->asic_prop.mmu_pgt_addr +
+ (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+ u64 virt_addr, u64 mask, u64 shift)
+{
+ return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
+ ((virt_addr & mask) >> shift);
+}
+
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
+}
+
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
+}
+
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
+}
+
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
+}
+
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
+}
+
+static inline u64 get_next_hop_addr(u64 curr_pte)
+{
+ if (curr_pte & PAGE_PRESENT_MASK)
+ return curr_pte & PHYS_ADDR_MASK;
+ else
+ return ULLONG_MAX;
+}
+
+static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
+ bool *is_new_hop)
+{
+ u64 hop_addr = get_next_hop_addr(curr_pte);
+
+ if (hop_addr == ULLONG_MAX) {
+ hop_addr = alloc_hop(ctx);
+ *is_new_hop = true;
+ }
+
+ return hop_addr;
+}
+
+/*
+ * hl_mmu_init - init the mmu module
+ *
+ * @hdev: pointer to the habanalabs device structure
+ *
+ * This function does the following:
+ * - Allocate max_asid zeroed hop0 pgts so no mapping is available
+ * - Enable mmu in hw
+ * - Invalidate the mmu cache
+ * - Create a pool of pages for pgts
+ * - Returns 0 on success
+ *
+ * This function depends on DMA QMAN to be working!
+ */
+int hl_mmu_init(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int rc;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ /* MMU HW init was already done in device hw_init() */
+
+ mutex_init(&hdev->mmu_cache_lock);
+
+ hdev->mmu_pgt_pool =
+ gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
+
+ if (!hdev->mmu_pgt_pool) {
+ dev_err(hdev->dev, "Failed to create page gen pool\n");
+ rc = -ENOMEM;
+ goto err_pool_create;
+ }
+
+ rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
+ prop->mmu_hop0_tables_total_size,
+ prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
+ -1);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
+ goto err_pool_add;
+ }
+
+ return 0;
+
+err_pool_add:
+ gen_pool_destroy(hdev->mmu_pgt_pool);
+err_pool_create:
+ mutex_destroy(&hdev->mmu_cache_lock);
+
+ return rc;
+}
+
+/*
+ * hl_mmu_fini - release the mmu module.
+ *
+ * @hdev: pointer to the habanalabs device structure
+ *
+ * This function does the following:
+ * - Disable mmu in hw
+ * - free the pgts pool
+ *
+ * All ctxs should be freed before calling this func
+ */
+void hl_mmu_fini(struct hl_device *hdev)
+{
+ if (!hdev->mmu_enable)
+ return;
+
+ gen_pool_destroy(hdev->mmu_pgt_pool);
+
+ mutex_destroy(&hdev->mmu_cache_lock);
+
+ /* MMU HW fini will be done in device hw_fini() */
+}
+
+/*
+ * hl_mmu_ctx_init - init a ctx for using the mmu module
+ *
+ * @ctx: pointer to the context structure
+ *
+ * This function does the following:
+ * - Init a mutex to protect the concurrent mapping flow
+ * - Init a hash to hold all pgts related to this ctx
+ */
+void hl_mmu_ctx_init(struct hl_ctx *ctx)
+{
+ if (!ctx->hdev->mmu_enable)
+ return;
+
+ mutex_init(&ctx->mmu_lock);
+ hash_init(ctx->mmu_hash);
+}
+
+/*
+ * hl_mmu_ctx_fini - disable a ctx from using the mmu module
+ *
+ * @ctx: pointer to the context structure
+ *
+ * This function does the following:
+ * - Free any pgts which were not freed yet
+ * - Free the mutex
+ */
+void hl_mmu_ctx_fini(struct hl_ctx *ctx)
+{
+ struct pgt_info *pgt_info;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!ctx->hdev->mmu_enable)
+ return;
+
+ if (!hash_empty(ctx->mmu_hash))
+ dev_err(ctx->hdev->dev,
+ "ctx is freed while it has pgts in use\n");
+
+ hash_for_each_safe(ctx->mmu_hash, i, tmp, pgt_info, node) {
+ dev_err(ctx->hdev->dev,
+ "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
+ pgt_info->addr, ctx->asid, pgt_info->num_of_ptes);
+ free_hop(ctx, pgt_info->addr);
+ }
+
+ mutex_destroy(&ctx->mmu_lock);
+}
+
+static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
+{
+ struct hl_device *hdev = ctx->hdev;
+ u64 hop0_addr = 0, hop0_pte_addr = 0,
+ hop1_addr = 0, hop1_pte_addr = 0,
+ hop2_addr = 0, hop2_pte_addr = 0,
+ hop3_addr = 0, hop3_pte_addr = 0,
+ hop4_addr = 0, hop4_pte_addr = 0,
+ curr_pte;
+ int clear_hop3 = 1;
+
+ hop0_addr = get_hop0_addr(ctx);
+
+ hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
+
+ hop1_addr = get_next_hop_addr(curr_pte);
+
+ if (hop1_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
+
+ hop2_addr = get_next_hop_addr(curr_pte);
+
+ if (hop2_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
+
+ hop3_addr = get_next_hop_addr(curr_pte);
+
+ if (hop3_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
+
+ if (!(curr_pte & LAST_MASK)) {
+ hop4_addr = get_next_hop_addr(curr_pte);
+
+ if (hop4_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
+
+ clear_hop3 = 0;
+ }
+
+ if (!(curr_pte & PAGE_PRESENT_MASK))
+ goto not_mapped;
+
+ clear_pte(hdev, hop4_addr ? hop4_pte_addr : hop3_pte_addr);
+
+ if (hop4_addr && !put_pte(ctx, hop4_addr))
+ clear_hop3 = 1;
+
+ if (!clear_hop3)
+ goto flush;
+ clear_pte(hdev, hop3_pte_addr);
+
+ if (put_pte(ctx, hop3_addr))
+ goto flush;
+ clear_pte(hdev, hop2_pte_addr);
+
+ if (put_pte(ctx, hop2_addr))
+ goto flush;
+ clear_pte(hdev, hop1_pte_addr);
+
+ if (put_pte(ctx, hop1_addr))
+ goto flush;
+ clear_pte(hdev, hop0_pte_addr);
+
+flush:
+ /* flush all writes from all cores to reach PCI */
+ mb();
+
+ hdev->asic_funcs->read_pte(hdev,
+ hop4_addr ? hop4_pte_addr : hop3_pte_addr);
+
+ return 0;
+
+not_mapped:
+ dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ virt_addr);
+
+ return -EINVAL;
+}
+
+/*
+ * hl_mmu_unmap - unmaps a virtual addr
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @page_size: size of the page to unmap
+ *
+ * This function does the following:
+ * - Check that the virt addr is mapped
+ * - Unmap the virt addr and frees pgts if possible
+ * - Returns 0 on success, -EINVAL if the given addr is not mapped
+ *
+ * Because this function changes the page tables in the device and because it
+ * changes the MMU hash, it must be protected by a lock.
+ * However, because it maps only a single page, the lock should be implemented
+ * in a higher level in order to protect the entire mapping of the memory area
+ */
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
+{
+ struct hl_device *hdev = ctx->hdev;
+ u64 real_virt_addr;
+ u32 real_page_size, npages;
+ int i, rc;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ /*
+ * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
+ * is bigger, we break it to sub-pages and unmap them separately.
+ */
+ if ((page_size % PAGE_SIZE_2MB) == 0) {
+ real_page_size = PAGE_SIZE_2MB;
+ } else if ((page_size % PAGE_SIZE_4KB) == 0) {
+ real_page_size = PAGE_SIZE_4KB;
+ } else {
+ dev_err(hdev->dev,
+ "page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
+ page_size);
+
+ return -EFAULT;
+ }
+
+ npages = page_size / real_page_size;
+ real_virt_addr = virt_addr;
+
+ for (i = 0 ; i < npages ; i++) {
+ rc = _hl_mmu_unmap(ctx, real_virt_addr);
+ if (rc)
+ return rc;
+
+ real_virt_addr += real_page_size;
+ }
+
+ return 0;
+}
+
+static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+ u32 page_size)
+{
+ struct hl_device *hdev = ctx->hdev;
+ u64 hop0_addr = 0, hop0_pte_addr = 0,
+ hop1_addr = 0, hop1_pte_addr = 0,
+ hop2_addr = 0, hop2_pte_addr = 0,
+ hop3_addr = 0, hop3_pte_addr = 0,
+ hop4_addr = 0, hop4_pte_addr = 0,
+ curr_pte = 0;
+ bool hop1_new = false, hop2_new = false, hop3_new = false,
+ hop4_new = false, is_huge;
+ int rc = -ENOMEM;
+
+ /*
+ * This mapping function can map a 4KB/2MB page. For 2MB page there are
+ * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
+ * pages only but user memory could have been allocated with one of the
+ * two page sizes. Since this is a common code for all the three cases,
+ * we need this hugs page check.
+ */
+ is_huge = page_size == PAGE_SIZE_2MB;
+
+ hop0_addr = get_hop0_addr(ctx);
+
+ hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
+
+ hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
+
+ if (hop1_addr == ULLONG_MAX)
+ goto err;
+
+ hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
+
+ hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
+
+ if (hop2_addr == ULLONG_MAX)
+ goto err;
+
+ hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
+
+ hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
+
+ if (hop3_addr == ULLONG_MAX)
+ goto err;
+
+ hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
+
+ if (!is_huge) {
+ hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
+
+ if (hop4_addr == ULLONG_MAX)
+ goto err;
+
+ hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+
+ curr_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
+ }
+
+ if (curr_pte & PAGE_PRESENT_MASK) {
+ dev_err(hdev->dev,
+ "mapping already exists for virt_addr 0x%llx\n",
+ virt_addr);
+
+ dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
+ hdev->asic_funcs->read_pte(hdev, hop0_pte_addr),
+ hop0_pte_addr);
+ dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
+ hdev->asic_funcs->read_pte(hdev, hop1_pte_addr),
+ hop1_pte_addr);
+ dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
+ hdev->asic_funcs->read_pte(hdev, hop2_pte_addr),
+ hop2_pte_addr);
+ dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
+ hdev->asic_funcs->read_pte(hdev, hop3_pte_addr),
+ hop3_pte_addr);
+
+ if (!is_huge)
+ dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
+ hdev->asic_funcs->read_pte(hdev,
+ hop4_pte_addr),
+ hop4_pte_addr);
+
+ rc = EINVAL;
+ goto err;
+ }
+
+ curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK
+ | PAGE_PRESENT_MASK;
+
+ hdev->asic_funcs->write_pte(hdev,
+ is_huge ? hop3_pte_addr : hop4_pte_addr,
+ curr_pte);
+
+ if (hop1_new) {
+ curr_pte = (hop1_addr & PTE_PHYS_ADDR_MASK) |
+ PAGE_PRESENT_MASK;
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop0_pte_addr,
+ curr_pte);
+ }
+ if (hop2_new) {
+ curr_pte = (hop2_addr & PTE_PHYS_ADDR_MASK) |
+ PAGE_PRESENT_MASK;
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop1_pte_addr,
+ curr_pte);
+ get_pte(ctx, hop1_addr);
+ }
+ if (hop3_new) {
+ curr_pte = (hop3_addr & PTE_PHYS_ADDR_MASK) |
+ PAGE_PRESENT_MASK;
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop2_pte_addr,
+ curr_pte);
+ get_pte(ctx, hop2_addr);
+ }
+
+ if (!is_huge) {
+ if (hop4_new) {
+ curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) |
+ PAGE_PRESENT_MASK;
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+ hop3_pte_addr, curr_pte);
+ get_pte(ctx, hop3_addr);
+ }
+
+ get_pte(ctx, hop4_addr);
+ } else {
+ get_pte(ctx, hop3_addr);
+ }
+
+ /* flush all writes from all cores to reach PCI */
+ mb();
+
+ hdev->asic_funcs->read_pte(hdev,
+ is_huge ? hop3_pte_addr : hop4_pte_addr);
+
+ return 0;
+
+err:
+ if (hop4_new)
+ free_hop(ctx, hop4_addr);
+ if (hop3_new)
+ free_hop(ctx, hop3_addr);
+ if (hop2_new)
+ free_hop(ctx, hop2_addr);
+ if (hop1_new)
+ free_hop(ctx, hop1_addr);
+
+ return rc;
+}
+
+/*
+ * hl_mmu_map - maps a virtual addr to physical addr
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @phys_addr: phys addr to map to
+ * @page_size: physical page size
+ *
+ * This function does the following:
+ * - Check that the virt addr is not mapped
+ * - Allocate pgts as necessary in order to map the virt addr to the phys
+ * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
+ *
+ * Because this function changes the page tables in the device and because it
+ * changes the MMU hash, it must be protected by a lock.
+ * However, because it maps only a single page, the lock should be implemented
+ * in a higher level in order to protect the entire mapping of the memory area
+ */
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
+{
+ struct hl_device *hdev = ctx->hdev;
+ u64 real_virt_addr;
+ u32 real_page_size, npages;
+ int i, rc, mapped_cnt = 0;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ /*
+ * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
+ * is bigger, we break it to sub-pages and map them separately.
+ */
+ if ((page_size % PAGE_SIZE_2MB) == 0) {
+ real_page_size = PAGE_SIZE_2MB;
+ } else if ((page_size % PAGE_SIZE_4KB) == 0) {
+ real_page_size = PAGE_SIZE_4KB;
+ } else {
+ dev_err(hdev->dev,
+ "page size of %u is not 4KB nor 2MB aligned, can't map\n",
+ page_size);
+
+ return -EFAULT;
+ }
+
+ npages = page_size / real_page_size;
+ real_virt_addr = virt_addr;
+
+ for (i = 0 ; i < npages ; i++) {
+ rc = _hl_mmu_map(ctx, real_virt_addr, phys_addr,
+ real_page_size);
+ if (rc)
+ goto err;
+
+ real_virt_addr += real_page_size;
+ mapped_cnt++;
+ }
+
+ return 0;
+
+err:
+ real_virt_addr = virt_addr;
+ for (i = 0 ; i < mapped_cnt ; i++) {
+ if (_hl_mmu_unmap(ctx, real_virt_addr))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap va: 0x%llx\n", real_virt_addr);
+
+ real_virt_addr += real_page_size;
+ }
+
+ return rc;
+}
+
+/*
+ * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+void hl_mmu_swap_out(struct hl_ctx *ctx)
+{
+
+}
+
+/*
+ * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+void hl_mmu_swap_in(struct hl_ctx *ctx)
+{
+
+}