diff options
Diffstat (limited to 'drivers/accel/amdxdna')
-rw-r--r-- | drivers/accel/amdxdna/Makefile | 4 | ||||
-rw-r--r-- | drivers/accel/amdxdna/TODO | 2 | ||||
-rw-r--r-- | drivers/accel/amdxdna/aie2_ctx.c | 16 | ||||
-rw-r--r-- | drivers/accel/amdxdna/aie2_message.c | 41 | ||||
-rw-r--r-- | drivers/accel/amdxdna/aie2_pci.c | 256 | ||||
-rw-r--r-- | drivers/accel/amdxdna/aie2_pci.h | 68 | ||||
-rw-r--r-- | drivers/accel/amdxdna/aie2_pm.c | 108 | ||||
-rw-r--r-- | drivers/accel/amdxdna/aie2_smu.c | 85 | ||||
-rw-r--r-- | drivers/accel/amdxdna/aie2_solver.c | 59 | ||||
-rw-r--r-- | drivers/accel/amdxdna/aie2_solver.h | 1 | ||||
-rw-r--r-- | drivers/accel/amdxdna/amdxdna_ctx.c | 53 | ||||
-rw-r--r-- | drivers/accel/amdxdna/amdxdna_gem.c | 2 | ||||
-rw-r--r-- | drivers/accel/amdxdna/amdxdna_mailbox.c | 61 | ||||
-rw-r--r-- | drivers/accel/amdxdna/amdxdna_pci_drv.c | 28 | ||||
-rw-r--r-- | drivers/accel/amdxdna/amdxdna_pci_drv.h | 26 | ||||
-rw-r--r-- | drivers/accel/amdxdna/npu1_regs.c | 31 | ||||
-rw-r--r-- | drivers/accel/amdxdna/npu2_regs.c | 17 | ||||
-rw-r--r-- | drivers/accel/amdxdna/npu4_regs.c | 34 | ||||
-rw-r--r-- | drivers/accel/amdxdna/npu5_regs.c | 17 | ||||
-rw-r--r-- | drivers/accel/amdxdna/npu6_regs.c | 114 |
20 files changed, 777 insertions, 246 deletions
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile index ed6f87910880..0e9adf6890a0 100644 --- a/drivers/accel/amdxdna/Makefile +++ b/drivers/accel/amdxdna/Makefile @@ -5,6 +5,7 @@ amdxdna-y := \ aie2_error.o \ aie2_message.o \ aie2_pci.o \ + aie2_pm.o \ aie2_psp.o \ aie2_smu.o \ aie2_solver.o \ @@ -17,5 +18,6 @@ amdxdna-y := \ npu1_regs.o \ npu2_regs.o \ npu4_regs.o \ - npu5_regs.o + npu5_regs.o \ + npu6_regs.o obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO index a130259f5f70..5119bccd1917 100644 --- a/drivers/accel/amdxdna/TODO +++ b/drivers/accel/amdxdna/TODO @@ -1,5 +1,3 @@ -- Replace idr with xa - Add import and export BO support - Add debugfs support - Add debug BO support -- Improve power management diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 90e8d87666a9..9facf45818f9 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -11,6 +11,7 @@ #include <drm/drm_syncobj.h> #include <linux/hmm.h> #include <linux/types.h> +#include <linux/xarray.h> #include <trace/events/amdxdna.h> #include "aie2_msg_priv.h" @@ -90,11 +91,11 @@ void aie2_restart_ctx(struct amdxdna_client *client) { struct amdxdna_dev *xdna = client->xdna; struct amdxdna_hwctx *hwctx; - int next = 0; + unsigned long hwctx_id; drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); mutex_lock(&client->hwctx_lock); - idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) { + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { if (hwctx->status != HWCTX_STAT_STOP) continue; @@ -179,7 +180,7 @@ aie2_sched_notify(struct amdxdna_sched_job *job) up(&job->hwctx->priv->job_sem); job->job_done = true; dma_fence_put(fence); - mmput(job->mm); + mmput_async(job->mm); aie2_job_put(job); } @@ -518,6 +519,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) struct drm_gpu_scheduler *sched; struct amdxdna_hwctx_priv *priv; struct amdxdna_gem_obj *heap; + struct amdxdna_dev_hdl *ndev; int i, ret; priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); @@ -612,6 +614,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) } hwctx->status = HWCTX_STAT_INIT; + ndev = xdna->dev_handle; + ndev->hwctx_num++; XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); @@ -641,10 +645,13 @@ free_priv: void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) { + struct amdxdna_dev_hdl *ndev; struct amdxdna_dev *xdna; int idx; xdna = hwctx->client->xdna; + ndev = xdna->dev_handle; + ndev->hwctx_num--; drm_sched_wqueue_stop(&hwctx->priv->sched); /* Now, scheduler will not send command to device. */ @@ -683,6 +690,9 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size int ret; XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name); + if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad))) + return -EINVAL; + if (hwctx->status != HWCTX_STAT_INIT) { XDNA_ERR(xdna, "Not support re-config CU"); return -EINVAL; diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index c01a1d957b56..9e2c9a44f76a 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -14,6 +14,7 @@ #include <linux/errno.h> #include <linux/pci.h> #include <linux/types.h> +#include <linux/xarray.h> #include "aie2_msg_priv.h" #include "aie2_pci.h" @@ -70,52 +71,33 @@ int aie2_resume_fw(struct amdxdna_dev_hdl *ndev) int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value) { DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG); + int ret; req.type = type; req.value = value; - return aie2_send_mgmt_msg_wait(ndev, &msg); -} - -int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value) -{ - DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG); - int ret; - - req.type = type; ret = aie2_send_mgmt_msg_wait(ndev, &msg); if (ret) { - XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret); + XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret); return ret; } - *value = resp.value; return 0; } -int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev) +int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value) { - DECLARE_AIE2_MSG(protocol_version, MSG_OP_GET_PROTOCOL_VERSION); - struct amdxdna_dev *xdna = ndev->xdna; + DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG); int ret; + req.type = type; ret = aie2_send_mgmt_msg_wait(ndev, &msg); if (ret) { - XDNA_ERR(xdna, "Failed to get protocol version, ret %d", ret); + XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret); return ret; } - if (resp.major != ndev->priv->protocol_major) { - XDNA_ERR(xdna, "Incompatible firmware protocol version major %d minor %d", - resp.major, resp.minor); - return -EINVAL; - } - - if (resp.minor < ndev->priv->protocol_minor) { - XDNA_ERR(xdna, "Firmware minor version smaller than supported"); - return -EINVAL; - } - + *value = resp.value; return 0; } @@ -315,10 +297,10 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, struct amdxdna_dev *xdna = ndev->xdna; struct amdxdna_client *client; struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; dma_addr_t dma_addr; u32 aie_bitmap = 0; u8 *buff_addr; - int next = 0; int ret, idx; buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr, @@ -329,7 +311,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, /* Go through each hardware context and mark the AIE columns that are active */ list_for_each_entry(client, &xdna->client_list, node) { idx = srcu_read_lock(&client->hwctx_srcu); - idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) aie_bitmap |= amdxdna_hwctx_col_map(hwctx); srcu_read_unlock(&client->hwctx_srcu, idx); } @@ -413,6 +395,9 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx) for (i = 0; i < hwctx->cus->num_cus; i++) { struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i]; + if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad))) + return -EINVAL; + gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo); if (!gobj) { XDNA_ERR(xdna, "Lookup GEM object failed"); diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 349ada697e48..8de8f3bd4987 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -15,6 +15,7 @@ #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/pci.h> +#include <linux/xarray.h> #include "aie2_msg_priv.h" #include "aie2_pci.h" @@ -33,17 +34,51 @@ MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used"); * The related register and ring buffer information is on SRAM BAR. * This struct is the register layout. */ +#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */ struct mgmt_mbox_chann_info { - u32 x2i_tail; - u32 x2i_head; - u32 x2i_buf; - u32 x2i_buf_sz; - u32 i2x_tail; - u32 i2x_head; - u32 i2x_buf; - u32 i2x_buf_sz; + __u32 x2i_tail; + __u32 x2i_head; + __u32 x2i_buf; + __u32 x2i_buf_sz; + __u32 i2x_tail; + __u32 i2x_head; + __u32 i2x_buf; + __u32 i2x_buf_sz; + __u32 magic; + __u32 msi_id; + __u32 prot_major; + __u32 prot_minor; + __u32 rsvd[4]; }; +static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor) +{ + struct amdxdna_dev *xdna = ndev->xdna; + + /* + * The driver supported mailbox behavior is defined by + * ndev->priv->protocol_major and protocol_minor. + * + * When protocol_major and fw_major are different, it means driver + * and firmware are incompatible. + */ + if (ndev->priv->protocol_major != fw_major) { + XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d", + fw_major, fw_minor); + return -EINVAL; + } + + /* + * When protocol_minor is greater then fw_minor, that means driver + * relies on operation the installed firmware does not support. + */ + if (ndev->priv->protocol_minor > fw_minor) { + XDNA_ERR(xdna, "Firmware minor version smaller than supported"); + return -EINVAL; + } + return 0; +} + static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev) { struct amdxdna_dev *xdna = ndev->xdna; @@ -57,6 +92,8 @@ static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev) XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr); XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size); XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx); + XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major); + XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor); } static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev) @@ -87,6 +124,12 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev) for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++) reg[i] = readl(ndev->sram_base + off + i * sizeof(u32)); + if (info_regs.magic != MGMT_MBOX_MAGIC) { + XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic); + ret = -EINVAL; + goto done; + } + i2x = &ndev->mgmt_i2x; x2i = &ndev->mgmt_x2i; @@ -99,38 +142,42 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev) x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail); x2i->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf); x2i->rb_size = info_regs.x2i_buf_sz; - ndev->mgmt_chan_idx = CHANN_INDEX(ndev, x2i->rb_start_addr); + ndev->mgmt_chan_idx = info_regs.msi_id; + ndev->mgmt_prot_major = info_regs.prot_major; + ndev->mgmt_prot_minor = info_regs.prot_minor; + + ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor); + +done: aie2_dump_chann_info_debug(ndev); /* Must clear address at FW_ALIVE_OFF */ writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF)); - return 0; + return ret; } -static int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev) +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, + enum rt_config_category category, u32 *val) { - const struct rt_config *cfg = &ndev->priv->rt_config; - u64 value; + const struct rt_config *cfg; + u32 value; int ret; - ret = aie2_set_runtime_cfg(ndev, cfg->type, cfg->value); - if (ret) { - XDNA_ERR(ndev->xdna, "Set runtime type %d value %d failed", - cfg->type, cfg->value); - return ret; - } + for (cfg = ndev->priv->rt_config; cfg->type; cfg++) { + if (cfg->category != category) + continue; - ret = aie2_get_runtime_cfg(ndev, cfg->type, &value); - if (ret) { - XDNA_ERR(ndev->xdna, "Get runtime cfg failed"); - return ret; + value = val ? *val : cfg->value; + ret = aie2_set_runtime_cfg(ndev, cfg->type, value); + if (ret) { + XDNA_ERR(ndev->xdna, "Set type %d value %d failed", + cfg->type, value); + return ret; + } } - if (value != cfg->value) - return -EINVAL; - return 0; } @@ -157,13 +204,7 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev) { int ret; - ret = aie2_check_protocol_version(ndev); - if (ret) { - XDNA_ERR(ndev->xdna, "Check header hash failed"); - return ret; - } - - ret = aie2_runtime_cfg(ndev); + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Runtime config failed"); return ret; @@ -257,9 +298,25 @@ static int aie2_xrs_unload(void *cb_arg) return ret; } +static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level) +{ + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + struct amdxdna_dev_hdl *ndev; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + ndev = xdna->dev_handle; + ndev->dft_dpm_level = dpm_level; + if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level) + return 0; + + return ndev->priv->hw_ops.set_dpm(ndev, dpm_level); +} + static struct xrs_action_ops aie2_xrs_actions = { .load = aie2_xrs_load, .unload = aie2_xrs_unload, + .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level, }; static void aie2_hw_stop(struct amdxdna_dev *xdna) @@ -267,12 +324,22 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna) struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); struct amdxdna_dev_hdl *ndev = xdna->dev_handle; + if (ndev->dev_status <= AIE2_DEV_INIT) { + XDNA_ERR(xdna, "device is already stopped"); + return; + } + aie2_mgmt_fw_fini(ndev); xdna_mailbox_stop_channel(ndev->mgmt_chann); xdna_mailbox_destroy_channel(ndev->mgmt_chann); + ndev->mgmt_chann = NULL; + drmm_kfree(&xdna->ddev, ndev->mbox); + ndev->mbox = NULL; aie2_psp_stop(ndev->psp_hdl); aie2_smu_fini(ndev); pci_disable_device(pdev); + + ndev->dev_status = AIE2_DEV_INIT; } static int aie2_hw_start(struct amdxdna_dev *xdna) @@ -283,6 +350,11 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) u32 xdna_mailbox_intr_reg; int mgmt_mb_irq, ret; + if (ndev->dev_status >= AIE2_DEV_START) { + XDNA_INFO(xdna, "device is already started"); + return 0; + } + ret = pci_enable_device(pdev); if (ret) { XDNA_ERR(xdna, "failed to enable device, ret %d", ret); @@ -339,12 +411,20 @@ static int aie2_hw_start(struct amdxdna_dev *xdna) goto stop_psp; } + ret = aie2_pm_init(ndev); + if (ret) { + XDNA_ERR(xdna, "failed to init pm, ret %d", ret); + goto destroy_mgmt_chann; + } + ret = aie2_mgmt_fw_init(ndev); if (ret) { XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret); goto destroy_mgmt_chann; } + ndev->dev_status = AIE2_DEV_START; + return 0; destroy_mgmt_chann: @@ -463,10 +543,9 @@ static int aie2_init(struct amdxdna_dev *xdna) } ndev->total_col = min(aie2_max_col, ndev->metadata.cols); - xrs_cfg.clk_list.num_levels = 3; - xrs_cfg.clk_list.cu_clk_list[0] = 0; - xrs_cfg.clk_list.cu_clk_list[1] = 800; - xrs_cfg.clk_list.cu_clk_list[2] = 1000; + xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1; + for (i = 0; i < xrs_cfg.clk_list.num_levels; i++) + xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk; xrs_cfg.sys_eff_factor = 1; xrs_cfg.ddev = &xdna->ddev; xrs_cfg.actions = &aie2_xrs_actions; @@ -623,6 +702,39 @@ static int aie2_get_aie_version(struct amdxdna_client *client, return 0; } +static int aie2_get_firmware_version(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_query_firmware_version version; + struct amdxdna_dev *xdna = client->xdna; + + version.major = xdna->fw_ver.major; + version.minor = xdna->fw_ver.minor; + version.patch = xdna->fw_ver.sub; + version.build = xdna->fw_ver.build; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version))) + return -EFAULT; + + return 0; +} + +static int aie2_get_power_mode(struct amdxdna_client *client, + struct amdxdna_drm_get_info *args) +{ + struct amdxdna_drm_get_power_mode mode = {}; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_dev_hdl *ndev; + + ndev = xdna->dev_handle; + mode.power_mode = ndev->pw_mode; + + if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode))) + return -EFAULT; + + return 0; +} + static int aie2_get_clock_metadata(struct amdxdna_client *client, struct amdxdna_drm_get_info *args) { @@ -636,11 +748,11 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client, if (!clock) return -ENOMEM; - memcpy(clock->mp_npu_clock.name, ndev->mp_npu_clock.name, - sizeof(clock->mp_npu_clock.name)); - clock->mp_npu_clock.freq_mhz = ndev->mp_npu_clock.freq_mhz; - memcpy(clock->h_clock.name, ndev->h_clock.name, sizeof(clock->h_clock.name)); - clock->h_clock.freq_mhz = ndev->h_clock.freq_mhz; + snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name), + "MP-NPU Clock"); + clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq; + snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock"); + clock->h_clock.freq_mhz = ndev->hclk_freq; if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock))) ret = -EFAULT; @@ -657,11 +769,11 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client, struct amdxdna_drm_query_hwctx *tmp; struct amdxdna_client *tmp_client; struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; bool overflow = false; u32 req_bytes = 0; u32 hw_i = 0; int ret = 0; - int next; int idx; drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); @@ -673,8 +785,7 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client, buf = u64_to_user_ptr(args->buffer); list_for_each_entry(tmp_client, &xdna->client_list, node) { idx = srcu_read_lock(&tmp_client->hwctx_srcu); - next = 0; - idr_for_each_entry_continue(&tmp_client->hwctx_idr, hwctx, next) { + amdxdna_for_each_hwctx(tmp_client, hwctx_id, hwctx) { req_bytes += sizeof(*tmp); if (args->buffer_size < req_bytes) { /* Continue iterating to get the required size */ @@ -736,6 +847,12 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i case DRM_AMDXDNA_QUERY_HW_CONTEXTS: ret = aie2_get_hwctx_status(client, args); break; + case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION: + ret = aie2_get_firmware_version(client, args); + break; + case DRM_AMDXDNA_GET_POWER_MODE: + ret = aie2_get_power_mode(client, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; @@ -746,12 +863,61 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i return ret; } +static int aie2_set_power_mode(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_drm_set_power_mode power_state; + enum amdxdna_power_mode_type power_mode; + struct amdxdna_dev *xdna = client->xdna; + + if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer), + sizeof(power_state))) { + XDNA_ERR(xdna, "Failed to copy power mode request into kernel"); + return -EFAULT; + } + + if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad))) + return -EINVAL; + + power_mode = power_state.power_mode; + if (power_mode > POWER_MODE_TURBO) { + XDNA_ERR(xdna, "Invalid power mode %d", power_mode); + return -EINVAL; + } + + return aie2_pm_set_mode(xdna->dev_handle, power_mode); +} + +static int aie2_set_state(struct amdxdna_client *client, + struct amdxdna_drm_set_state *args) +{ + struct amdxdna_dev *xdna = client->xdna; + int ret, idx; + + if (!drm_dev_enter(&xdna->ddev, &idx)) + return -ENODEV; + + switch (args->param) { + case DRM_AMDXDNA_SET_POWER_MODE: + ret = aie2_set_power_mode(client, args); + break; + default: + XDNA_ERR(xdna, "Not supported request parameter %u", args->param); + ret = -EOPNOTSUPP; + break; + } + + drm_dev_exit(idx); + return ret; +} + const struct amdxdna_dev_ops aie2_ops = { .init = aie2_init, .fini = aie2_fini, .resume = aie2_hw_start, .suspend = aie2_hw_stop, .get_aie_info = aie2_get_info, + .set_aie_state = aie2_set_state, .hwctx_init = aie2_hwctx_init, .hwctx_fini = aie2_hwctx_fini, .hwctx_config = aie2_hwctx_config, diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index 6a2686255c9c..cc159cadff9f 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -6,6 +6,7 @@ #ifndef _AIE2_PCI_H_ #define _AIE2_PCI_H_ +#include <drm/amdxdna_accel.h> #include <linux/semaphore.h> #include "amdxdna_mailbox.h" @@ -38,9 +39,6 @@ }) #define CHAN_SLOT_SZ SZ_8K -#define CHANN_INDEX(ndev, rbuf_off) \ - (((rbuf_off) - SRAM_REG_OFF((ndev), MBOX_CHANN_OFF)) / CHAN_SLOT_SZ) - #define MBOX_SIZE(ndev) \ ({ \ typeof(ndev) _ndev = (ndev); \ @@ -48,9 +46,6 @@ pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \ }) -#define SMU_MPNPUCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_mpnpuclk_freq_max) -#define SMU_HCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_hclk_freq_max) - enum aie2_smu_reg_idx { SMU_CMD_REG = 0, SMU_ARG_REG, @@ -112,14 +107,20 @@ struct aie_metadata { struct aie_tile_metadata shim; }; -struct clock_entry { - char name[16]; - u32 freq_mhz; +enum rt_config_category { + AIE2_RT_CFG_INIT, + AIE2_RT_CFG_CLK_GATING, }; struct rt_config { u32 type; u32 value; + u32 category; +}; + +struct dpm_clk_freq { + u32 npuclk; + u32 hclk; }; /* @@ -149,6 +150,12 @@ struct amdxdna_hwctx_priv { struct drm_syncobj *syncobj; }; +enum aie2_dev_status { + AIE2_DEV_UNINIT, + AIE2_DEV_INIT, + AIE2_DEV_START, +}; + struct amdxdna_dev_hdl { struct amdxdna_dev *xdna; const struct amdxdna_dev_priv *priv; @@ -160,17 +167,29 @@ struct amdxdna_dev_hdl { struct xdna_mailbox_chann_res mgmt_x2i; struct xdna_mailbox_chann_res mgmt_i2x; u32 mgmt_chan_idx; + u32 mgmt_prot_major; + u32 mgmt_prot_minor; u32 total_col; struct aie_version version; struct aie_metadata metadata; - struct clock_entry mp_npu_clock; - struct clock_entry h_clock; + + /* power management and clock*/ + enum amdxdna_power_mode_type pw_mode; + u32 dpm_level; + u32 dft_dpm_level; + u32 max_dpm_level; + u32 clk_gating; + u32 npuclk_freq; + u32 hclk_freq; /* Mailbox and the management channel */ struct mailbox *mbox; struct mailbox_channel *mgmt_chann; struct async_events *async_events; + + enum aie2_dev_status dev_status; + u32 hwctx_num; }; #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \ @@ -181,11 +200,17 @@ struct aie2_bar_off_pair { u32 offset; }; +struct aie2_hw_ops { + int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +}; + struct amdxdna_dev_priv { const char *fw_path; u64 protocol_major; u64 protocol_minor; - struct rt_config rt_config; + const struct rt_config *rt_config; + const struct dpm_clk_freq *dpm_clk_tbl; + #define COL_ALIGN_NONE 0 #define COL_ALIGN_NATURE 1 u32 col_align; @@ -196,15 +221,29 @@ struct amdxdna_dev_priv { struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX]; struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS]; struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS]; - u32 smu_mpnpuclk_freq_max; - u32 smu_hclk_freq_max; + struct aie2_hw_ops hw_ops; }; extern const struct amdxdna_dev_ops aie2_ops; +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev, + enum rt_config_category category, u32 *val); + +/* aie2 npu hw config */ +extern const struct dpm_clk_freq npu1_dpm_clk_table[]; +extern const struct dpm_clk_freq npu4_dpm_clk_table[]; +extern const struct rt_config npu1_default_rt_cfg[]; +extern const struct rt_config npu4_default_rt_cfg[]; + /* aie2_smu.c */ int aie2_smu_init(struct amdxdna_dev_hdl *ndev); void aie2_smu_fini(struct amdxdna_dev_hdl *ndev); +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); + +/* aie2_pm.c */ +int aie2_pm_init(struct amdxdna_dev_hdl *ndev); +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); /* aie2_psp.c */ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf); @@ -222,7 +261,6 @@ int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev); int aie2_resume_fw(struct amdxdna_dev_hdl *ndev); int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value); int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value); -int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev); int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid); int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version); int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata); diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c new file mode 100644 index 000000000000..426c38fce848 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_pm.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/drm_print.h> +#include <drm/gpu_scheduler.h> + +#include "aie2_pci.h" +#include "amdxdna_pci_drv.h" + +#define AIE2_CLK_GATING_ENABLE 1 +#define AIE2_CLK_GATING_DISABLE 0 + +static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val) +{ + int ret; + + ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val); + if (ret) + return ret; + + ndev->clk_gating = val; + return 0; +} + +int aie2_pm_init(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + if (ndev->dev_status != AIE2_DEV_UNINIT) { + /* Resume device */ + ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating); + if (ret) + return ret; + + return 0; + } + + while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk) + ndev->max_dpm_level++; + ndev->max_dpm_level--; + + ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE); + if (ret) + return ret; + + ndev->pw_mode = POWER_MODE_DEFAULT; + ndev->dft_dpm_level = ndev->max_dpm_level; + + return 0; +} + +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target) +{ + struct amdxdna_dev *xdna = ndev->xdna; + u32 clk_gating, dpm_level; + int ret; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + + if (ndev->pw_mode == target) + return 0; + + switch (target) { + case POWER_MODE_TURBO: + if (ndev->hwctx_num) { + XDNA_ERR(xdna, "Can not set turbo when there is active hwctx"); + return -EINVAL; + } + + clk_gating = AIE2_CLK_GATING_DISABLE; + dpm_level = ndev->max_dpm_level; + break; + case POWER_MODE_HIGH: + clk_gating = AIE2_CLK_GATING_ENABLE; + dpm_level = ndev->max_dpm_level; + break; + case POWER_MODE_DEFAULT: + clk_gating = AIE2_CLK_GATING_ENABLE; + dpm_level = ndev->dft_dpm_level; + break; + default: + return -EOPNOTSUPP; + } + + ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + if (ret) + return ret; + + ret = aie2_pm_set_clk_gating(ndev, clk_gating); + if (ret) + return ret; + + ndev->pw_mode = target; + + return 0; +} diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c index 91893d438da7..73388443c676 100644 --- a/drivers/accel/amdxdna/aie2_smu.c +++ b/drivers/accel/amdxdna/aie2_smu.c @@ -19,8 +19,11 @@ #define AIE2_SMU_POWER_OFF 0x4 #define AIE2_SMU_SET_MPNPUCLK_FREQ 0x5 #define AIE2_SMU_SET_HCLK_FREQ 0x6 +#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7 +#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8 -static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg) +static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, + u32 reg_arg, u32 *out) { u32 resp; int ret; @@ -40,6 +43,9 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg) return ret; } + if (out) + *out = readl(SMU_REG(ndev, SMU_OUT_REG)); + if (resp != SMU_RESULT_OK) { XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp); return -EINVAL; @@ -48,63 +54,71 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg) return 0; } -static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz) +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { + u32 freq; int ret; - if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) { - XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz); - return -EINVAL; + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq); + if (ret) { + XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n", + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret); } + ndev->npuclk_freq = freq; - ndev->mp_npu_clock.freq_mhz = freq_mhz; - ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz); - if (!ret) - XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", freq_mhz); - - return ret; -} - -static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz) -{ - int ret; - - if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) { - XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz); - return -EINVAL; + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, + ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq); + if (ret) { + XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n", + ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret); } + ndev->hclk_freq = freq; + ndev->dpm_level = dpm_level; - ndev->h_clock.freq_mhz = freq_mhz; - ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz); - if (!ret) - XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", freq_mhz); + XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", + ndev->npuclk_freq, ndev->hclk_freq); - return ret; + return 0; } -int aie2_smu_init(struct amdxdna_dev_hdl *ndev) +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { int ret; - ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0); + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL); if (ret) { - XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret); + XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ", + dpm_level, ret); return ret; } - ret = aie2_smu_set_mpnpu_clock_freq(ndev, SMU_MPNPUCLK_FREQ_MAX(ndev)); + ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL); if (ret) { - XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret); + XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d", + dpm_level, ret); return ret; } - snprintf(ndev->mp_npu_clock.name, sizeof(ndev->mp_npu_clock.name), "MP-NPU Clock"); - ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev)); + ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk; + ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk; + ndev->dpm_level = dpm_level; + + XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n", + ndev->npuclk_freq, ndev->hclk_freq); + + return 0; +} + +int aie2_smu_init(struct amdxdna_dev_hdl *ndev) +{ + int ret; + + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL); if (ret) { - XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret); + XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret); return ret; } - snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H Clock"); return 0; } @@ -113,7 +127,8 @@ void aie2_smu_fini(struct amdxdna_dev_hdl *ndev) { int ret; - ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0); + ndev->priv->hw_ops.set_dpm(ndev, 0); + ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL); if (ret) XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret); } diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c index a537c66589a4..1939625d6027 100644 --- a/drivers/accel/amdxdna/aie2_solver.c +++ b/drivers/accel/amdxdna/aie2_solver.c @@ -25,6 +25,7 @@ struct solver_node { struct partition_node *pt_node; void *cb_arg; + u32 dpm_level; u32 cols_len; u32 start_cols[] __counted_by(cols_len); }; @@ -95,6 +96,51 @@ static int sanity_check(struct solver_state *xrs, struct alloc_requests *req) return 0; } +static bool is_valid_qos_dpm_params(struct aie_qos *rqos) +{ + /* + * gops is retrieved from the xmodel, so it's always set + * fps and latency are the configurable params from the application + */ + if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0)) + return true; + + return false; +} + +static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level) +{ + struct solver_rgroup *rgp = &xrs->rgp; + struct cdo_parts *cdop = &req->cdo; + struct aie_qos *rqos = &req->rqos; + u32 freq, max_dpm_level, level; + struct solver_node *node; + + max_dpm_level = xrs->cfg.clk_list.num_levels - 1; + /* If no QoS parameters are passed, set it to the max DPM level */ + if (!is_valid_qos_dpm_params(rqos)) { + level = max_dpm_level; + goto set_dpm; + } + + /* Find one CDO group that meet the GOPs requirement. */ + for (level = 0; level < max_dpm_level; level++) { + freq = xrs->cfg.clk_list.cu_clk_list[level]; + if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000)) + break; + } + + /* set the dpm level which fits all the sessions */ + list_for_each_entry(node, &rgp->node_list, list) { + if (node->dpm_level > level) + level = node->dpm_level; + } + +set_dpm: + *dpm_level = level; + return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level); +} + static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid) { struct solver_node *node; @@ -159,12 +205,9 @@ static int get_free_partition(struct solver_state *xrs, pt_node->ncols = ncols; /* - * Before fully support latency in QoS, if a request - * specifies a non-zero latency value, it will not share - * the partition with other requests. + * Always set exclusive to false for now. */ - if (req->rqos.latency) - pt_node->exclusive = true; + pt_node->exclusive = false; list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list); xrs->rgp.npartition_node++; @@ -257,6 +300,7 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg) struct xrs_action_load load_act; struct solver_node *snode; struct solver_state *xrs; + u32 dpm_level; int ret; xrs = (struct solver_state *)hdl; @@ -281,6 +325,11 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg) if (ret) goto free_node; + ret = set_dpm_level(xrs, req, &dpm_level); + if (ret) + goto free_node; + + snode->dpm_level = dpm_level; snode->cb_arg = cb_arg; drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n", diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h index 9b1847bb46a6..a2e3c52229e9 100644 --- a/drivers/accel/amdxdna/aie2_solver.h +++ b/drivers/accel/amdxdna/aie2_solver.h @@ -99,6 +99,7 @@ struct clk_list_info { struct xrs_action_ops { int (*load)(void *cb_arg, struct xrs_action_load *action); int (*unload)(void *cb_arg); + int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level); }; /* diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 5478b631b73f..d11b1c83d9c3 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -11,6 +11,7 @@ #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_print.h> #include <drm/gpu_scheduler.h> +#include <linux/xarray.h> #include <trace/events/amdxdna.h> #include "amdxdna_ctx.h" @@ -63,11 +64,11 @@ void amdxdna_hwctx_suspend(struct amdxdna_client *client) { struct amdxdna_dev *xdna = client->xdna; struct amdxdna_hwctx *hwctx; - int next = 0; + unsigned long hwctx_id; drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); mutex_lock(&client->hwctx_lock); - idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) xdna->dev_info->ops->hwctx_suspend(hwctx); mutex_unlock(&client->hwctx_lock); } @@ -76,11 +77,11 @@ void amdxdna_hwctx_resume(struct amdxdna_client *client) { struct amdxdna_dev *xdna = client->xdna; struct amdxdna_hwctx *hwctx; - int next = 0; + unsigned long hwctx_id; drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); mutex_lock(&client->hwctx_lock); - idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) xdna->dev_info->ops->hwctx_resume(hwctx); mutex_unlock(&client->hwctx_lock); } @@ -149,13 +150,13 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) void amdxdna_hwctx_remove_all(struct amdxdna_client *client) { struct amdxdna_hwctx *hwctx; - int next = 0; + unsigned long hwctx_id; mutex_lock(&client->hwctx_lock); - idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) { + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) { XDNA_DBG(client->xdna, "PID %d close HW context %d", client->pid, hwctx->id); - idr_remove(&client->hwctx_idr, hwctx->id); + xa_erase(&client->hwctx_xa, hwctx->id); mutex_unlock(&client->hwctx_lock); amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); mutex_lock(&client->hwctx_lock); @@ -194,15 +195,13 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr hwctx->num_tiles = args->num_tiles; hwctx->mem_size = args->mem_size; hwctx->max_opc = args->max_opc; - mutex_lock(&client->hwctx_lock); - ret = idr_alloc_cyclic(&client->hwctx_idr, hwctx, 0, MAX_HWCTX_ID, GFP_KERNEL); + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx, + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID), + &client->next_hwctxid, GFP_KERNEL); if (ret < 0) { - mutex_unlock(&client->hwctx_lock); XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret); goto free_hwctx; } - hwctx->id = ret; - mutex_unlock(&client->hwctx_lock); hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id); if (!hwctx->name) { @@ -228,9 +227,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr free_name: kfree(hwctx->name); rm_id: - mutex_lock(&client->hwctx_lock); - idr_remove(&client->hwctx_idr, hwctx->id); - mutex_unlock(&client->hwctx_lock); + xa_erase(&client->hwctx_xa, hwctx->id); free_hwctx: kfree(hwctx); exit: @@ -246,27 +243,24 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d struct amdxdna_hwctx *hwctx; int ret = 0, idx; + if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad))) + return -EINVAL; + if (!drm_dev_enter(dev, &idx)) return -ENODEV; - /* - * Use hwctx_lock to achieve exclusion with other hwctx writers, - * SRCU to synchronize with exec/wait command ioctls. - * - * The pushed jobs are handled by DRM scheduler during destroy. - */ - mutex_lock(&client->hwctx_lock); - hwctx = idr_find(&client->hwctx_idr, args->handle); + hwctx = xa_erase(&client->hwctx_xa, args->handle); if (!hwctx) { - mutex_unlock(&client->hwctx_lock); ret = -EINVAL; XDNA_DBG(xdna, "PID %d HW context %d not exist", client->pid, args->handle); goto out; } - idr_remove(&client->hwctx_idr, hwctx->id); - mutex_unlock(&client->hwctx_lock); + /* + * The pushed jobs are handled by DRM scheduler during destroy. + * SRCU to synchronize with exec command ioctls. + */ amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle); @@ -286,6 +280,9 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr void *buf; u64 val; + if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad))) + return -EINVAL; + if (!xdna->dev_info->ops->hwctx_config) return -EOPNOTSUPP; @@ -324,7 +321,7 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr mutex_lock(&xdna->dev_lock); idx = srcu_read_lock(&client->hwctx_srcu); - hwctx = idr_find(&client->hwctx_idr, args->handle); + hwctx = xa_load(&client->hwctx_xa, args->handle); if (!hwctx) { XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); ret = -EINVAL; @@ -436,7 +433,7 @@ int amdxdna_cmd_submit(struct amdxdna_client *client, } idx = srcu_read_lock(&client->hwctx_srcu); - hwctx = idr_find(&client->hwctx_idr, hwctx_hdl); + hwctx = xa_load(&client->hwctx_xa, hwctx_hdl); if (!hwctx) { XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, hwctx_hdl); diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 4dfeca306d98..606433d73236 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -552,7 +552,7 @@ int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm struct drm_gem_object *gobj; int ret = 0; - if (args->ext || args->ext_flags) + if (args->ext || args->ext_flags || args->pad) return -EINVAL; gobj = drm_gem_object_lookup(filp, args->handle); diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index 415d99abaaa3..1afc8079e3d1 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -6,7 +6,9 @@ #include <drm/drm_device.h> #include <drm/drm_managed.h> #include <linux/bitfield.h> +#include <linux/interrupt.h> #include <linux/iopoll.h> +#include <linux/xarray.h> #define CREATE_TRACE_POINTS #include <trace/events/amdxdna.h> @@ -54,8 +56,8 @@ struct mailbox_channel { struct xdna_mailbox_chann_res res[CHAN_RES_NUM]; int msix_irq; u32 iohub_int_addr; - struct idr chan_idr; - spinlock_t chan_idr_lock; /* protect chan_idr */ + struct xarray chan_xa; + u32 next_msgid; u32 x2i_tail; /* Received msg related fields */ @@ -164,19 +166,17 @@ static inline int mailbox_validate_msgid(int msg_id) static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg) { - unsigned long flags; - int msg_id; + u32 msg_id; + int ret; - spin_lock_irqsave(&mb_chann->chan_idr_lock, flags); - msg_id = idr_alloc_cyclic(&mb_chann->chan_idr, mb_msg, 0, - MAX_MSG_ID_ENTRIES, GFP_NOWAIT); - spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags); - if (msg_id < 0) - return msg_id; + ret = xa_alloc_cyclic_irq(&mb_chann->chan_xa, &msg_id, mb_msg, + XA_LIMIT(0, MAX_MSG_ID_ENTRIES - 1), + &mb_chann->next_msgid, GFP_NOWAIT); + if (ret < 0) + return ret; /* - * The IDR becomes less efficient when dealing with larger IDs. - * Thus, add MAGIC_VAL to the higher bits. + * Add MAGIC_VAL to the higher bits. */ msg_id |= MAGIC_VAL; return msg_id; @@ -184,25 +184,17 @@ static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbo static void mailbox_release_msgid(struct mailbox_channel *mb_chann, int msg_id) { - unsigned long flags; - msg_id &= ~MAGIC_VAL_MASK; - spin_lock_irqsave(&mb_chann->chan_idr_lock, flags); - idr_remove(&mb_chann->chan_idr, msg_id); - spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags); + xa_erase_irq(&mb_chann->chan_xa, msg_id); } -static int mailbox_release_msg(int id, void *p, void *data) +static void mailbox_release_msg(struct mailbox_channel *mb_chann, + struct mailbox_msg *mb_msg) { - struct mailbox_channel *mb_chann = data; - struct mailbox_msg *mb_msg = p; - MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x", mb_msg->pkg.header.id, mb_msg->pkg.header.opcode); mb_msg->notify_cb(mb_msg->handle, NULL, 0); kfree(mb_msg); - - return 0; } static int @@ -254,7 +246,6 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade void *data) { struct mailbox_msg *mb_msg; - unsigned long flags; int msg_id; int ret; @@ -265,15 +256,11 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade } msg_id &= ~MAGIC_VAL_MASK; - spin_lock_irqsave(&mb_chann->chan_idr_lock, flags); - mb_msg = idr_find(&mb_chann->chan_idr, msg_id); + mb_msg = xa_erase_irq(&mb_chann->chan_xa, msg_id); if (!mb_msg) { MB_ERR(mb_chann, "Cannot find msg 0x%x", msg_id); - spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags); return -EINVAL; } - idr_remove(&mb_chann->chan_idr, msg_id); - spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags); MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x", header->opcode, header->total_size, header->id); @@ -497,8 +484,7 @@ xdna_mailbox_create_channel(struct mailbox *mb, memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i)); memcpy(&mb_chann->res[CHAN_RES_I2X], i2x, sizeof(*i2x)); - spin_lock_init(&mb_chann->chan_idr_lock); - idr_init(&mb_chann->chan_idr); + xa_init_flags(&mb_chann->chan_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I); mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X); @@ -530,16 +516,18 @@ free_and_out: int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) { - if (!mb_chann) - return 0; + struct mailbox_msg *mb_msg; + unsigned long msg_id; MB_DBG(mb_chann, "IRQ disabled and RX work cancelled"); free_irq(mb_chann->msix_irq, mb_chann); destroy_workqueue(mb_chann->work_q); /* We can clean up and release resources */ - idr_for_each(&mb_chann->chan_idr, mailbox_release_msg, mb_chann); - idr_destroy(&mb_chann->chan_idr); + xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg) + mailbox_release_msg(mb_chann, mb_msg); + + xa_destroy(&mb_chann->chan_xa); MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq); kfree(mb_chann); @@ -548,9 +536,6 @@ int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann) void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann) { - if (!mb_chann) - return; - /* Disable an irq and wait. This might sleep. */ disable_irq(mb_chann->msix_irq); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 02533732d4ca..194e44fc243d 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -39,6 +39,7 @@ static const struct amdxdna_device_id amdxdna_ids[] = { { 0x17f0, 0x0, &dev_npu2_info }, { 0x17f0, 0x10, &dev_npu4_info }, { 0x17f0, 0x11, &dev_npu5_info }, + { 0x17f0, 0x20, &dev_npu6_info }, {0} }; @@ -77,7 +78,7 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) } mutex_init(&client->hwctx_lock); init_srcu_struct(&client->hwctx_srcu); - idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1); + xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC); mutex_init(&client->mm_lock); mutex_lock(&xdna->dev_lock); @@ -108,7 +109,7 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) XDNA_DBG(xdna, "closing pid %d", client->pid); - idr_destroy(&client->hwctx_idr); + xa_destroy(&client->hwctx_xa); cleanup_srcu_struct(&client->hwctx_srcu); mutex_destroy(&client->hwctx_lock); mutex_destroy(&client->mm_lock); @@ -160,6 +161,24 @@ static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct return ret; } +static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_drm_set_state *args = data; + int ret; + + if (!xdna->dev_info->ops->set_aie_state) + return -EOPNOTSUPP; + + XDNA_DBG(xdna, "Request parameter %u", args->param); + mutex_lock(&xdna->dev_lock); + ret = xdna->dev_info->ops->set_aie_state(client, args); + mutex_unlock(&xdna->dev_lock); + + return ret; +} + static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = { /* Context */ DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0), @@ -173,6 +192,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0), /* AIE hardware */ DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY), }; static const struct file_operations amdxdna_fops = { @@ -390,8 +410,8 @@ static int amdxdna_rpmops_resume(struct device *dev) } static const struct dev_pm_ops amdxdna_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume) - SET_RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL) + SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume) + RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL) }; static struct pci_driver amdxdna_pci_driver = { diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index c50d65a050ad..37848a8d8031 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -6,12 +6,29 @@ #ifndef _AMDXDNA_PCI_DRV_H_ #define _AMDXDNA_PCI_DRV_H_ +#include <linux/xarray.h> + #define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args) #define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args) #define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args) #define XDNA_DBG(xdna, fmt, args...) drm_dbg(&(xdna)->ddev, fmt, ##args) #define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args) +#define XDNA_MBZ_DBG(xdna, ptr, sz) \ + ({ \ + int __i; \ + int __ret = 0; \ + u8 *__ptr = (u8 *)(ptr); \ + for (__i = 0; __i < (sz); __i++) { \ + if (__ptr[__i]) { \ + XDNA_DBG(xdna, "MBZ check failed"); \ + __ret = -EINVAL; \ + break; \ + } \ + } \ + __ret; \ + }) + #define to_xdna_dev(drm_dev) \ ((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev)) @@ -20,6 +37,7 @@ extern const struct drm_driver amdxdna_drm_drv; struct amdxdna_client; struct amdxdna_dev; struct amdxdna_drm_get_info; +struct amdxdna_drm_set_state; struct amdxdna_gem_obj; struct amdxdna_hwctx; struct amdxdna_sched_job; @@ -40,6 +58,7 @@ struct amdxdna_dev_ops { void (*hwctx_resume)(struct amdxdna_hwctx *hwctx); int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args); + int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args); }; /* @@ -100,7 +119,8 @@ struct amdxdna_client { struct mutex hwctx_lock; /* protect hwctx */ /* do NOT wait this srcu when hwctx_lock is held */ struct srcu_struct hwctx_srcu; - struct idr hwctx_idr; + struct xarray hwctx_xa; + u32 next_hwctxid; struct amdxdna_dev *xdna; struct drm_file *filp; @@ -111,11 +131,15 @@ struct amdxdna_client { int pasid; }; +#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \ + xa_for_each(&(client)->hwctx_xa, hwctx_id, entry) + /* Add device info below */ extern const struct amdxdna_dev_info dev_npu1_info; extern const struct amdxdna_dev_info dev_npu2_info; extern const struct amdxdna_dev_info dev_npu4_info; extern const struct amdxdna_dev_info dev_npu5_info; +extern const struct amdxdna_dev_info dev_npu6_info; int amdxdna_sysfs_init(struct amdxdna_dev *xdna); void amdxdna_sysfs_fini(struct amdxdna_dev *xdna); diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index f00c50461b09..e408af57e378 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -44,18 +44,30 @@ #define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE #define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE -#define NPU1_RT_CFG_TYPE_PDI_LOAD 2 -#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0 -#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1 +const struct rt_config npu1_default_rt_cfg[] = { + { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 0 }, +}; -#define NPU1_MPNPUCLK_FREQ_MAX 600 -#define NPU1_HCLK_FREQ_MAX 1024 +const struct dpm_clk_freq npu1_dpm_clk_table[] = { + {400, 800}, + {600, 1024}, + {600, 1024}, + {600, 1024}, + {600, 1024}, + {720, 1309}, + {720, 1309}, + {847, 1600}, + { 0 } +}; const struct amdxdna_dev_priv npu1_dev_priv = { .fw_path = "amdnpu/1502_00/npu.sbin", .protocol_major = 0x5, - .protocol_minor = 0x1, - .rt_config = {NPU1_RT_CFG_TYPE_PDI_LOAD, NPU1_RT_CFG_VAL_PDI_LOAD_APP}, + .protocol_minor = 0x7, + .rt_config = npu1_default_rt_cfg, + .dpm_clk_tbl = npu1_dpm_clk_table, .col_align = COL_ALIGN_NONE, .mbox_dev_addr = NPU1_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ @@ -80,8 +92,9 @@ const struct amdxdna_dev_priv npu1_dev_priv = { DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6), DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7), }, - .smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX, - .smu_hclk_freq_max = NPU1_HCLK_FREQ_MAX, + .hw_ops = { + .set_dpm = npu1_set_dpm, + }, }; const struct amdxdna_dev_info dev_npu1_info = { diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c index 00cb381031d2..286bd0d475e2 100644 --- a/drivers/accel/amdxdna/npu2_regs.c +++ b/drivers/accel/amdxdna/npu2_regs.c @@ -61,18 +61,12 @@ #define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE #define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE -#define NPU2_RT_CFG_TYPE_PDI_LOAD 5 -#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0 -#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1 - -#define NPU2_MPNPUCLK_FREQ_MAX 1267 -#define NPU2_HCLK_FREQ_MAX 1800 - const struct amdxdna_dev_priv npu2_dev_priv = { .fw_path = "amdnpu/17f0_00/npu.sbin", .protocol_major = 0x6, - .protocol_minor = 0x1, - .rt_config = {NPU2_RT_CFG_TYPE_PDI_LOAD, NPU2_RT_CFG_VAL_PDI_LOAD_APP}, + .protocol_minor = 0x6, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU2_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu2_dev_priv = { DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61), DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60), }, - .smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX, - .smu_hclk_freq_max = NPU2_HCLK_FREQ_MAX, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, }; const struct amdxdna_dev_info dev_npu2_info = { diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index b6dae9667cca..00c52833ce89 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -61,18 +61,33 @@ #define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE #define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE -#define NPU4_RT_CFG_TYPE_PDI_LOAD 5 -#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0 -#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1 +const struct rt_config npu4_default_rt_cfg[] = { + { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */ + { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */ + { 0 }, +}; -#define NPU4_MPNPUCLK_FREQ_MAX 1267 -#define NPU4_HCLK_FREQ_MAX 1800 +const struct dpm_clk_freq npu4_dpm_clk_table[] = { + {396, 792}, + {600, 1056}, + {792, 1152}, + {975, 1267}, + {975, 1267}, + {1056, 1408}, + {1152, 1584}, + {1267, 1800}, + { 0 } +}; const struct amdxdna_dev_priv npu4_dev_priv = { .fw_path = "amdnpu/17f0_10/npu.sbin", .protocol_major = 0x6, - .protocol_minor = 0x1, - .rt_config = {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_APP}, + .protocol_minor = 12, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU4_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ @@ -97,8 +112,9 @@ const struct amdxdna_dev_priv npu4_dev_priv = { DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61), DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60), }, - .smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX, - .smu_hclk_freq_max = NPU4_HCLK_FREQ_MAX, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, }; const struct amdxdna_dev_info dev_npu4_info = { diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c index bed1baf8e160..118849272f27 100644 --- a/drivers/accel/amdxdna/npu5_regs.c +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -61,18 +61,12 @@ #define NPU5_SMU_BAR_BASE MMNPU_APERTURE4_BASE #define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE -#define NPU5_RT_CFG_TYPE_PDI_LOAD 5 -#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0 -#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1 - -#define NPU5_MPNPUCLK_FREQ_MAX 1267 -#define NPU5_HCLK_FREQ_MAX 1800 - const struct amdxdna_dev_priv npu5_dev_priv = { .fw_path = "amdnpu/17f0_11/npu.sbin", .protocol_major = 0x6, - .protocol_minor = 0x1, - .rt_config = {NPU5_RT_CFG_TYPE_PDI_LOAD, NPU5_RT_CFG_VAL_PDI_LOAD_APP}, + .protocol_minor = 12, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, .col_align = COL_ALIGN_NATURE, .mbox_dev_addr = NPU5_MBOX_BAR_BASE, .mbox_size = 0, /* Use BAR size */ @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu5_dev_priv = { DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61), DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU5_SMU, MP1_C2PMSG_60), }, - .smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX, - .smu_hclk_freq_max = NPU5_HCLK_FREQ_MAX, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, }; const struct amdxdna_dev_info dev_npu5_info = { diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c new file mode 100644 index 000000000000..f46c760cefc7 --- /dev/null +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include <drm/amdxdna_accel.h> +#include <drm/drm_device.h> +#include <drm/gpu_scheduler.h> +#include <linux/sizes.h> + +#include "aie2_pci.h" +#include "amdxdna_mailbox.h" +#include "amdxdna_pci_drv.h" + +/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PUB_SEC_INTR 0x3010060 +#define MPNPU_PUB_PWRMGMT_INTR 0x3010064 +#define MPNPU_PUB_SCRATCH0 0x301006C +#define MPNPU_PUB_SCRATCH1 0x3010070 +#define MPNPU_PUB_SCRATCH2 0x3010074 +#define MPNPU_PUB_SCRATCH3 0x3010078 +#define MPNPU_PUB_SCRATCH4 0x301007C +#define MPNPU_PUB_SCRATCH5 0x3010080 +#define MPNPU_PUB_SCRATCH6 0x3010084 +#define MPNPU_PUB_SCRATCH7 0x3010088 +#define MPNPU_PUB_SCRATCH8 0x301008C +#define MPNPU_PUB_SCRATCH9 0x3010090 +#define MPNPU_PUB_SCRATCH10 0x3010094 +#define MPNPU_PUB_SCRATCH11 0x3010098 +#define MPNPU_PUB_SCRATCH12 0x301009C +#define MPNPU_PUB_SCRATCH13 0x30100A0 +#define MPNPU_PUB_SCRATCH14 0x30100A4 +#define MPNPU_PUB_SCRATCH15 0x30100A8 +#define MP0_C2PMSG_73 0x3810A24 +#define MP0_C2PMSG_123 0x3810AEC + +#define MP1_C2PMSG_0 0x3B10900 +#define MP1_C2PMSG_60 0x3B109F0 +#define MP1_C2PMSG_61 0x3B109F4 + +#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000 +#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000 +#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000 +#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000 + +#define MMNPU_APERTURE0_BASE 0x3000000 +#define MMNPU_APERTURE1_BASE 0x3600000 +#define MMNPU_APERTURE3_BASE 0x3810000 +#define MMNPU_APERTURE4_BASE 0x3B10000 + +/* PCIe BAR Index for NPU6 */ +#define NPU6_REG_BAR_INDEX 0 +#define NPU6_MBOX_BAR_INDEX 0 +#define NPU6_PSP_BAR_INDEX 4 +#define NPU6_SMU_BAR_INDEX 5 +#define NPU6_SRAM_BAR_INDEX 2 +/* Associated BARs and Apertures */ +#define NPU6_REG_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU6_MBOX_BAR_BASE MMNPU_APERTURE0_BASE +#define NPU6_PSP_BAR_BASE MMNPU_APERTURE3_BASE +#define NPU6_SMU_BAR_BASE MMNPU_APERTURE4_BASE +#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE + +const struct amdxdna_dev_priv npu6_dev_priv = { + .fw_path = "amdnpu/17f0_10/npu.sbin", + .protocol_major = 0x6, + .protocol_minor = 12, + .rt_config = npu4_default_rt_cfg, + .dpm_clk_tbl = npu4_dpm_clk_table, + .col_align = COL_ALIGN_NATURE, + .mbox_dev_addr = NPU6_MBOX_BAR_BASE, + .mbox_size = 0, /* Use BAR size */ + .sram_dev_addr = NPU6_SRAM_BAR_BASE, + .sram_offs = { + DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0), + DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15), + }, + .psp_regs_off = { + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU6_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU6_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU6_REG, MPNPU_PUB_SCRATCH4), + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU6_REG, MPNPU_PUB_SCRATCH9), + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73), + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123), + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3), + }, + .smu_regs_off = { + DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0), + DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU6_SMU, MP1_C2PMSG_60), + DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE), + DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61), + DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU6_SMU, MP1_C2PMSG_60), + }, + .hw_ops = { + .set_dpm = npu4_set_dpm, + }, + +}; + +const struct amdxdna_dev_info dev_npu6_info = { + .reg_bar = NPU6_REG_BAR_INDEX, + .mbox_bar = NPU6_MBOX_BAR_INDEX, + .sram_bar = NPU6_SRAM_BAR_INDEX, + .psp_bar = NPU6_PSP_BAR_INDEX, + .smu_bar = NPU6_SMU_BAR_INDEX, + .first_col = 0, + .dev_mem_buf_shift = 15, /* 32 KiB aligned */ + .dev_mem_base = AIE2_DEVM_BASE, + .dev_mem_size = AIE2_DEVM_SIZE, + .vbnv = "RyzenAI-npu6", + .device_type = AMDXDNA_DEV_TYPE_KMQ, + .dev_priv = &npu6_dev_priv, + .ops = &aie2_ops, +}; |