summaryrefslogtreecommitdiff
path: root/drivers/accel/amdxdna
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/amdxdna')
-rw-r--r--drivers/accel/amdxdna/Makefile4
-rw-r--r--drivers/accel/amdxdna/TODO2
-rw-r--r--drivers/accel/amdxdna/aie2_ctx.c16
-rw-r--r--drivers/accel/amdxdna/aie2_message.c41
-rw-r--r--drivers/accel/amdxdna/aie2_pci.c256
-rw-r--r--drivers/accel/amdxdna/aie2_pci.h68
-rw-r--r--drivers/accel/amdxdna/aie2_pm.c108
-rw-r--r--drivers/accel/amdxdna/aie2_smu.c85
-rw-r--r--drivers/accel/amdxdna/aie2_solver.c59
-rw-r--r--drivers/accel/amdxdna/aie2_solver.h1
-rw-r--r--drivers/accel/amdxdna/amdxdna_ctx.c53
-rw-r--r--drivers/accel/amdxdna/amdxdna_gem.c2
-rw-r--r--drivers/accel/amdxdna/amdxdna_mailbox.c61
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.c28
-rw-r--r--drivers/accel/amdxdna/amdxdna_pci_drv.h26
-rw-r--r--drivers/accel/amdxdna/npu1_regs.c31
-rw-r--r--drivers/accel/amdxdna/npu2_regs.c17
-rw-r--r--drivers/accel/amdxdna/npu4_regs.c34
-rw-r--r--drivers/accel/amdxdna/npu5_regs.c17
-rw-r--r--drivers/accel/amdxdna/npu6_regs.c114
20 files changed, 777 insertions, 246 deletions
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index ed6f87910880..0e9adf6890a0 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -5,6 +5,7 @@ amdxdna-y := \
aie2_error.o \
aie2_message.o \
aie2_pci.o \
+ aie2_pm.o \
aie2_psp.o \
aie2_smu.o \
aie2_solver.o \
@@ -17,5 +18,6 @@ amdxdna-y := \
npu1_regs.o \
npu2_regs.o \
npu4_regs.o \
- npu5_regs.o
+ npu5_regs.o \
+ npu6_regs.o
obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
index a130259f5f70..5119bccd1917 100644
--- a/drivers/accel/amdxdna/TODO
+++ b/drivers/accel/amdxdna/TODO
@@ -1,5 +1,3 @@
-- Replace idr with xa
- Add import and export BO support
- Add debugfs support
- Add debug BO support
-- Improve power management
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 90e8d87666a9..9facf45818f9 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -11,6 +11,7 @@
#include <drm/drm_syncobj.h>
#include <linux/hmm.h>
#include <linux/types.h>
+#include <linux/xarray.h>
#include <trace/events/amdxdna.h>
#include "aie2_msg_priv.h"
@@ -90,11 +91,11 @@ void aie2_restart_ctx(struct amdxdna_client *client)
{
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_hwctx *hwctx;
- int next = 0;
+ unsigned long hwctx_id;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
mutex_lock(&client->hwctx_lock);
- idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
if (hwctx->status != HWCTX_STAT_STOP)
continue;
@@ -179,7 +180,7 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
up(&job->hwctx->priv->job_sem);
job->job_done = true;
dma_fence_put(fence);
- mmput(job->mm);
+ mmput_async(job->mm);
aie2_job_put(job);
}
@@ -518,6 +519,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
struct drm_gpu_scheduler *sched;
struct amdxdna_hwctx_priv *priv;
struct amdxdna_gem_obj *heap;
+ struct amdxdna_dev_hdl *ndev;
int i, ret;
priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
@@ -612,6 +614,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
}
hwctx->status = HWCTX_STAT_INIT;
+ ndev = xdna->dev_handle;
+ ndev->hwctx_num++;
XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
@@ -641,10 +645,13 @@ free_priv:
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
{
+ struct amdxdna_dev_hdl *ndev;
struct amdxdna_dev *xdna;
int idx;
xdna = hwctx->client->xdna;
+ ndev = xdna->dev_handle;
+ ndev->hwctx_num--;
drm_sched_wqueue_stop(&hwctx->priv->sched);
/* Now, scheduler will not send command to device. */
@@ -683,6 +690,9 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
int ret;
XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
+ if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
+ return -EINVAL;
+
if (hwctx->status != HWCTX_STAT_INIT) {
XDNA_ERR(xdna, "Not support re-config CU");
return -EINVAL;
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index c01a1d957b56..9e2c9a44f76a 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -14,6 +14,7 @@
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/types.h>
+#include <linux/xarray.h>
#include "aie2_msg_priv.h"
#include "aie2_pci.h"
@@ -70,52 +71,33 @@ int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
{
DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
+ int ret;
req.type = type;
req.value = value;
- return aie2_send_mgmt_msg_wait(ndev, &msg);
-}
-
-int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
-{
- DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG);
- int ret;
-
- req.type = type;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret) {
- XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret);
+ XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
return ret;
}
- *value = resp.value;
return 0;
}
-int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev)
+int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
{
- DECLARE_AIE2_MSG(protocol_version, MSG_OP_GET_PROTOCOL_VERSION);
- struct amdxdna_dev *xdna = ndev->xdna;
+ DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG);
int ret;
+ req.type = type;
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
if (ret) {
- XDNA_ERR(xdna, "Failed to get protocol version, ret %d", ret);
+ XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret);
return ret;
}
- if (resp.major != ndev->priv->protocol_major) {
- XDNA_ERR(xdna, "Incompatible firmware protocol version major %d minor %d",
- resp.major, resp.minor);
- return -EINVAL;
- }
-
- if (resp.minor < ndev->priv->protocol_minor) {
- XDNA_ERR(xdna, "Firmware minor version smaller than supported");
- return -EINVAL;
- }
-
+ *value = resp.value;
return 0;
}
@@ -315,10 +297,10 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
struct amdxdna_dev *xdna = ndev->xdna;
struct amdxdna_client *client;
struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
dma_addr_t dma_addr;
u32 aie_bitmap = 0;
u8 *buff_addr;
- int next = 0;
int ret, idx;
buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
@@ -329,7 +311,7 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
/* Go through each hardware context and mark the AIE columns that are active */
list_for_each_entry(client, &xdna->client_list, node) {
idx = srcu_read_lock(&client->hwctx_srcu);
- idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next)
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
aie_bitmap |= amdxdna_hwctx_col_map(hwctx);
srcu_read_unlock(&client->hwctx_srcu, idx);
}
@@ -413,6 +395,9 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
for (i = 0; i < hwctx->cus->num_cus; i++) {
struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i];
+ if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad)))
+ return -EINVAL;
+
gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo);
if (!gobj) {
XDNA_ERR(xdna, "Lookup GEM object failed");
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 349ada697e48..8de8f3bd4987 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -15,6 +15,7 @@
#include <linux/iommu.h>
#include <linux/iopoll.h>
#include <linux/pci.h>
+#include <linux/xarray.h>
#include "aie2_msg_priv.h"
#include "aie2_pci.h"
@@ -33,17 +34,51 @@ MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
* The related register and ring buffer information is on SRAM BAR.
* This struct is the register layout.
*/
+#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */
struct mgmt_mbox_chann_info {
- u32 x2i_tail;
- u32 x2i_head;
- u32 x2i_buf;
- u32 x2i_buf_sz;
- u32 i2x_tail;
- u32 i2x_head;
- u32 i2x_buf;
- u32 i2x_buf_sz;
+ __u32 x2i_tail;
+ __u32 x2i_head;
+ __u32 x2i_buf;
+ __u32 x2i_buf_sz;
+ __u32 i2x_tail;
+ __u32 i2x_head;
+ __u32 i2x_buf;
+ __u32 i2x_buf_sz;
+ __u32 magic;
+ __u32 msi_id;
+ __u32 prot_major;
+ __u32 prot_minor;
+ __u32 rsvd[4];
};
+static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+
+ /*
+ * The driver supported mailbox behavior is defined by
+ * ndev->priv->protocol_major and protocol_minor.
+ *
+ * When protocol_major and fw_major are different, it means driver
+ * and firmware are incompatible.
+ */
+ if (ndev->priv->protocol_major != fw_major) {
+ XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d",
+ fw_major, fw_minor);
+ return -EINVAL;
+ }
+
+ /*
+ * When protocol_minor is greater then fw_minor, that means driver
+ * relies on operation the installed firmware does not support.
+ */
+ if (ndev->priv->protocol_minor > fw_minor) {
+ XDNA_ERR(xdna, "Firmware minor version smaller than supported");
+ return -EINVAL;
+ }
+ return 0;
+}
+
static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
{
struct amdxdna_dev *xdna = ndev->xdna;
@@ -57,6 +92,8 @@ static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr);
XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size);
XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx);
+ XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major);
+ XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor);
}
static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
@@ -87,6 +124,12 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++)
reg[i] = readl(ndev->sram_base + off + i * sizeof(u32));
+ if (info_regs.magic != MGMT_MBOX_MAGIC) {
+ XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic);
+ ret = -EINVAL;
+ goto done;
+ }
+
i2x = &ndev->mgmt_i2x;
x2i = &ndev->mgmt_x2i;
@@ -99,38 +142,42 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail);
x2i->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf);
x2i->rb_size = info_regs.x2i_buf_sz;
- ndev->mgmt_chan_idx = CHANN_INDEX(ndev, x2i->rb_start_addr);
+ ndev->mgmt_chan_idx = info_regs.msi_id;
+ ndev->mgmt_prot_major = info_regs.prot_major;
+ ndev->mgmt_prot_minor = info_regs.prot_minor;
+
+ ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor);
+
+done:
aie2_dump_chann_info_debug(ndev);
/* Must clear address at FW_ALIVE_OFF */
writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF));
- return 0;
+ return ret;
}
-static int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev)
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+ enum rt_config_category category, u32 *val)
{
- const struct rt_config *cfg = &ndev->priv->rt_config;
- u64 value;
+ const struct rt_config *cfg;
+ u32 value;
int ret;
- ret = aie2_set_runtime_cfg(ndev, cfg->type, cfg->value);
- if (ret) {
- XDNA_ERR(ndev->xdna, "Set runtime type %d value %d failed",
- cfg->type, cfg->value);
- return ret;
- }
+ for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
+ if (cfg->category != category)
+ continue;
- ret = aie2_get_runtime_cfg(ndev, cfg->type, &value);
- if (ret) {
- XDNA_ERR(ndev->xdna, "Get runtime cfg failed");
- return ret;
+ value = val ? *val : cfg->value;
+ ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
+ cfg->type, value);
+ return ret;
+ }
}
- if (value != cfg->value)
- return -EINVAL;
-
return 0;
}
@@ -157,13 +204,7 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
{
int ret;
- ret = aie2_check_protocol_version(ndev);
- if (ret) {
- XDNA_ERR(ndev->xdna, "Check header hash failed");
- return ret;
- }
-
- ret = aie2_runtime_cfg(ndev);
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
if (ret) {
XDNA_ERR(ndev->xdna, "Runtime config failed");
return ret;
@@ -257,9 +298,25 @@ static int aie2_xrs_unload(void *cb_arg)
return ret;
}
+static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+ struct amdxdna_dev_hdl *ndev;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ ndev = xdna->dev_handle;
+ ndev->dft_dpm_level = dpm_level;
+ if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
+ return 0;
+
+ return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+}
+
static struct xrs_action_ops aie2_xrs_actions = {
.load = aie2_xrs_load,
.unload = aie2_xrs_unload,
+ .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
};
static void aie2_hw_stop(struct amdxdna_dev *xdna)
@@ -267,12 +324,22 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
+ if (ndev->dev_status <= AIE2_DEV_INIT) {
+ XDNA_ERR(xdna, "device is already stopped");
+ return;
+ }
+
aie2_mgmt_fw_fini(ndev);
xdna_mailbox_stop_channel(ndev->mgmt_chann);
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
+ ndev->mgmt_chann = NULL;
+ drmm_kfree(&xdna->ddev, ndev->mbox);
+ ndev->mbox = NULL;
aie2_psp_stop(ndev->psp_hdl);
aie2_smu_fini(ndev);
pci_disable_device(pdev);
+
+ ndev->dev_status = AIE2_DEV_INIT;
}
static int aie2_hw_start(struct amdxdna_dev *xdna)
@@ -283,6 +350,11 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
u32 xdna_mailbox_intr_reg;
int mgmt_mb_irq, ret;
+ if (ndev->dev_status >= AIE2_DEV_START) {
+ XDNA_INFO(xdna, "device is already started");
+ return 0;
+ }
+
ret = pci_enable_device(pdev);
if (ret) {
XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
@@ -339,12 +411,20 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
goto stop_psp;
}
+ ret = aie2_pm_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
ret = aie2_mgmt_fw_init(ndev);
if (ret) {
XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
goto destroy_mgmt_chann;
}
+ ndev->dev_status = AIE2_DEV_START;
+
return 0;
destroy_mgmt_chann:
@@ -463,10 +543,9 @@ static int aie2_init(struct amdxdna_dev *xdna)
}
ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
- xrs_cfg.clk_list.num_levels = 3;
- xrs_cfg.clk_list.cu_clk_list[0] = 0;
- xrs_cfg.clk_list.cu_clk_list[1] = 800;
- xrs_cfg.clk_list.cu_clk_list[2] = 1000;
+ xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
+ for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
+ xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
xrs_cfg.sys_eff_factor = 1;
xrs_cfg.ddev = &xdna->ddev;
xrs_cfg.actions = &aie2_xrs_actions;
@@ -623,6 +702,39 @@ static int aie2_get_aie_version(struct amdxdna_client *client,
return 0;
}
+static int aie2_get_firmware_version(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_query_firmware_version version;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ version.major = xdna->fw_ver.major;
+ version.minor = xdna->fw_ver.minor;
+ version.patch = xdna->fw_ver.sub;
+ version.build = xdna->fw_ver.build;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int aie2_get_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_get_power_mode mode = {};
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ mode.power_mode = ndev->pw_mode;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int aie2_get_clock_metadata(struct amdxdna_client *client,
struct amdxdna_drm_get_info *args)
{
@@ -636,11 +748,11 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
if (!clock)
return -ENOMEM;
- memcpy(clock->mp_npu_clock.name, ndev->mp_npu_clock.name,
- sizeof(clock->mp_npu_clock.name));
- clock->mp_npu_clock.freq_mhz = ndev->mp_npu_clock.freq_mhz;
- memcpy(clock->h_clock.name, ndev->h_clock.name, sizeof(clock->h_clock.name));
- clock->h_clock.freq_mhz = ndev->h_clock.freq_mhz;
+ snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
+ "MP-NPU Clock");
+ clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
+ snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
+ clock->h_clock.freq_mhz = ndev->hclk_freq;
if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
ret = -EFAULT;
@@ -657,11 +769,11 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client,
struct amdxdna_drm_query_hwctx *tmp;
struct amdxdna_client *tmp_client;
struct amdxdna_hwctx *hwctx;
+ unsigned long hwctx_id;
bool overflow = false;
u32 req_bytes = 0;
u32 hw_i = 0;
int ret = 0;
- int next;
int idx;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
@@ -673,8 +785,7 @@ static int aie2_get_hwctx_status(struct amdxdna_client *client,
buf = u64_to_user_ptr(args->buffer);
list_for_each_entry(tmp_client, &xdna->client_list, node) {
idx = srcu_read_lock(&tmp_client->hwctx_srcu);
- next = 0;
- idr_for_each_entry_continue(&tmp_client->hwctx_idr, hwctx, next) {
+ amdxdna_for_each_hwctx(tmp_client, hwctx_id, hwctx) {
req_bytes += sizeof(*tmp);
if (args->buffer_size < req_bytes) {
/* Continue iterating to get the required size */
@@ -736,6 +847,12 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
case DRM_AMDXDNA_QUERY_HW_CONTEXTS:
ret = aie2_get_hwctx_status(client, args);
break;
+ case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
+ ret = aie2_get_firmware_version(client, args);
+ break;
+ case DRM_AMDXDNA_GET_POWER_MODE:
+ ret = aie2_get_power_mode(client, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
@@ -746,12 +863,61 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
return ret;
}
+static int aie2_set_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_drm_set_power_mode power_state;
+ enum amdxdna_power_mode_type power_mode;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
+ sizeof(power_state))) {
+ XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
+ return -EFAULT;
+ }
+
+ if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad)))
+ return -EINVAL;
+
+ power_mode = power_state.power_mode;
+ if (power_mode > POWER_MODE_TURBO) {
+ XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
+ return -EINVAL;
+ }
+
+ return aie2_pm_set_mode(xdna->dev_handle, power_mode);
+}
+
+static int aie2_set_state(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ int ret, idx;
+
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return -ENODEV;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_SET_POWER_MODE:
+ ret = aie2_set_power_mode(client, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ drm_dev_exit(idx);
+ return ret;
+}
+
const struct amdxdna_dev_ops aie2_ops = {
.init = aie2_init,
.fini = aie2_fini,
.resume = aie2_hw_start,
.suspend = aie2_hw_stop,
.get_aie_info = aie2_get_info,
+ .set_aie_state = aie2_set_state,
.hwctx_init = aie2_hwctx_init,
.hwctx_fini = aie2_hwctx_fini,
.hwctx_config = aie2_hwctx_config,
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 6a2686255c9c..cc159cadff9f 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -6,6 +6,7 @@
#ifndef _AIE2_PCI_H_
#define _AIE2_PCI_H_
+#include <drm/amdxdna_accel.h>
#include <linux/semaphore.h>
#include "amdxdna_mailbox.h"
@@ -38,9 +39,6 @@
})
#define CHAN_SLOT_SZ SZ_8K
-#define CHANN_INDEX(ndev, rbuf_off) \
- (((rbuf_off) - SRAM_REG_OFF((ndev), MBOX_CHANN_OFF)) / CHAN_SLOT_SZ)
-
#define MBOX_SIZE(ndev) \
({ \
typeof(ndev) _ndev = (ndev); \
@@ -48,9 +46,6 @@
pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
})
-#define SMU_MPNPUCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_mpnpuclk_freq_max)
-#define SMU_HCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_hclk_freq_max)
-
enum aie2_smu_reg_idx {
SMU_CMD_REG = 0,
SMU_ARG_REG,
@@ -112,14 +107,20 @@ struct aie_metadata {
struct aie_tile_metadata shim;
};
-struct clock_entry {
- char name[16];
- u32 freq_mhz;
+enum rt_config_category {
+ AIE2_RT_CFG_INIT,
+ AIE2_RT_CFG_CLK_GATING,
};
struct rt_config {
u32 type;
u32 value;
+ u32 category;
+};
+
+struct dpm_clk_freq {
+ u32 npuclk;
+ u32 hclk;
};
/*
@@ -149,6 +150,12 @@ struct amdxdna_hwctx_priv {
struct drm_syncobj *syncobj;
};
+enum aie2_dev_status {
+ AIE2_DEV_UNINIT,
+ AIE2_DEV_INIT,
+ AIE2_DEV_START,
+};
+
struct amdxdna_dev_hdl {
struct amdxdna_dev *xdna;
const struct amdxdna_dev_priv *priv;
@@ -160,17 +167,29 @@ struct amdxdna_dev_hdl {
struct xdna_mailbox_chann_res mgmt_x2i;
struct xdna_mailbox_chann_res mgmt_i2x;
u32 mgmt_chan_idx;
+ u32 mgmt_prot_major;
+ u32 mgmt_prot_minor;
u32 total_col;
struct aie_version version;
struct aie_metadata metadata;
- struct clock_entry mp_npu_clock;
- struct clock_entry h_clock;
+
+ /* power management and clock*/
+ enum amdxdna_power_mode_type pw_mode;
+ u32 dpm_level;
+ u32 dft_dpm_level;
+ u32 max_dpm_level;
+ u32 clk_gating;
+ u32 npuclk_freq;
+ u32 hclk_freq;
/* Mailbox and the management channel */
struct mailbox *mbox;
struct mailbox_channel *mgmt_chann;
struct async_events *async_events;
+
+ enum aie2_dev_status dev_status;
+ u32 hwctx_num;
};
#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
@@ -181,11 +200,17 @@ struct aie2_bar_off_pair {
u32 offset;
};
+struct aie2_hw_ops {
+ int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+};
+
struct amdxdna_dev_priv {
const char *fw_path;
u64 protocol_major;
u64 protocol_minor;
- struct rt_config rt_config;
+ const struct rt_config *rt_config;
+ const struct dpm_clk_freq *dpm_clk_tbl;
+
#define COL_ALIGN_NONE 0
#define COL_ALIGN_NATURE 1
u32 col_align;
@@ -196,15 +221,29 @@ struct amdxdna_dev_priv {
struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS];
- u32 smu_mpnpuclk_freq_max;
- u32 smu_hclk_freq_max;
+ struct aie2_hw_ops hw_ops;
};
extern const struct amdxdna_dev_ops aie2_ops;
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+ enum rt_config_category category, u32 *val);
+
+/* aie2 npu hw config */
+extern const struct dpm_clk_freq npu1_dpm_clk_table[];
+extern const struct dpm_clk_freq npu4_dpm_clk_table[];
+extern const struct rt_config npu1_default_rt_cfg[];
+extern const struct rt_config npu4_default_rt_cfg[];
+
/* aie2_smu.c */
int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+
+/* aie2_pm.c */
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
/* aie2_psp.c */
struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
@@ -222,7 +261,6 @@ int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
-int aie2_check_protocol_version(struct amdxdna_dev_hdl *ndev);
int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
new file mode 100644
index 000000000000..426c38fce848
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+
+#define AIE2_CLK_GATING_ENABLE 1
+#define AIE2_CLK_GATING_DISABLE 0
+
+static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
+{
+ int ret;
+
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
+ if (ret)
+ return ret;
+
+ ndev->clk_gating = val;
+ return 0;
+}
+
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ if (ndev->dev_status != AIE2_DEV_UNINIT) {
+ /* Resume device */
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
+ if (ret)
+ return ret;
+
+ return 0;
+ }
+
+ while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
+ ndev->max_dpm_level++;
+ ndev->max_dpm_level--;
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
+ if (ret)
+ return ret;
+
+ ndev->pw_mode = POWER_MODE_DEFAULT;
+ ndev->dft_dpm_level = ndev->max_dpm_level;
+
+ return 0;
+}
+
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target)
+{
+ struct amdxdna_dev *xdna = ndev->xdna;
+ u32 clk_gating, dpm_level;
+ int ret;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+ if (ndev->pw_mode == target)
+ return 0;
+
+ switch (target) {
+ case POWER_MODE_TURBO:
+ if (ndev->hwctx_num) {
+ XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
+ return -EINVAL;
+ }
+
+ clk_gating = AIE2_CLK_GATING_DISABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_HIGH:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_DEFAULT:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->dft_dpm_level;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, clk_gating);
+ if (ret)
+ return ret;
+
+ ndev->pw_mode = target;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
index 91893d438da7..73388443c676 100644
--- a/drivers/accel/amdxdna/aie2_smu.c
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -19,8 +19,11 @@
#define AIE2_SMU_POWER_OFF 0x4
#define AIE2_SMU_SET_MPNPUCLK_FREQ 0x5
#define AIE2_SMU_SET_HCLK_FREQ 0x6
+#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7
+#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8
-static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
+static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
+ u32 reg_arg, u32 *out)
{
u32 resp;
int ret;
@@ -40,6 +43,9 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
return ret;
}
+ if (out)
+ *out = readl(SMU_REG(ndev, SMU_OUT_REG));
+
if (resp != SMU_RESULT_OK) {
XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
return -EINVAL;
@@ -48,63 +54,71 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
return 0;
}
-static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
+ u32 freq;
int ret;
- if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) {
- XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz);
- return -EINVAL;
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
}
+ ndev->npuclk_freq = freq;
- ndev->mp_npu_clock.freq_mhz = freq_mhz;
- ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz);
- if (!ret)
- XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", freq_mhz);
-
- return ret;
-}
-
-static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
-{
- int ret;
-
- if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) {
- XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz);
- return -EINVAL;
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
}
+ ndev->hclk_freq = freq;
+ ndev->dpm_level = dpm_level;
- ndev->h_clock.freq_mhz = freq_mhz;
- ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz);
- if (!ret)
- XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", freq_mhz);
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
- return ret;
+ return 0;
}
-int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
int ret;
- ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0);
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
if (ret) {
- XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
+ XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
+ dpm_level, ret);
return ret;
}
- ret = aie2_smu_set_mpnpu_clock_freq(ndev, SMU_MPNPUCLK_FREQ_MAX(ndev));
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
if (ret) {
- XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret);
+ XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
+ dpm_level, ret);
return ret;
}
- snprintf(ndev->mp_npu_clock.name, sizeof(ndev->mp_npu_clock.name), "MP-NPU Clock");
- ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev));
+ ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
+ ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
+ ndev->dpm_level = dpm_level;
+
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
+
+ return 0;
+}
+
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
if (ret) {
- XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret);
+ XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
return ret;
}
- snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H Clock");
return 0;
}
@@ -113,7 +127,8 @@ void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
{
int ret;
- ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0);
+ ndev->priv->hw_ops.set_dpm(ndev, 0);
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
if (ret)
XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
}
diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c
index a537c66589a4..1939625d6027 100644
--- a/drivers/accel/amdxdna/aie2_solver.c
+++ b/drivers/accel/amdxdna/aie2_solver.c
@@ -25,6 +25,7 @@ struct solver_node {
struct partition_node *pt_node;
void *cb_arg;
+ u32 dpm_level;
u32 cols_len;
u32 start_cols[] __counted_by(cols_len);
};
@@ -95,6 +96,51 @@ static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
return 0;
}
+static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
+{
+ /*
+ * gops is retrieved from the xmodel, so it's always set
+ * fps and latency are the configurable params from the application
+ */
+ if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0))
+ return true;
+
+ return false;
+}
+
+static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
+{
+ struct solver_rgroup *rgp = &xrs->rgp;
+ struct cdo_parts *cdop = &req->cdo;
+ struct aie_qos *rqos = &req->rqos;
+ u32 freq, max_dpm_level, level;
+ struct solver_node *node;
+
+ max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
+ /* If no QoS parameters are passed, set it to the max DPM level */
+ if (!is_valid_qos_dpm_params(rqos)) {
+ level = max_dpm_level;
+ goto set_dpm;
+ }
+
+ /* Find one CDO group that meet the GOPs requirement. */
+ for (level = 0; level < max_dpm_level; level++) {
+ freq = xrs->cfg.clk_list.cu_clk_list[level];
+ if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
+ break;
+ }
+
+ /* set the dpm level which fits all the sessions */
+ list_for_each_entry(node, &rgp->node_list, list) {
+ if (node->dpm_level > level)
+ level = node->dpm_level;
+ }
+
+set_dpm:
+ *dpm_level = level;
+ return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
+}
+
static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
{
struct solver_node *node;
@@ -159,12 +205,9 @@ static int get_free_partition(struct solver_state *xrs,
pt_node->ncols = ncols;
/*
- * Before fully support latency in QoS, if a request
- * specifies a non-zero latency value, it will not share
- * the partition with other requests.
+ * Always set exclusive to false for now.
*/
- if (req->rqos.latency)
- pt_node->exclusive = true;
+ pt_node->exclusive = false;
list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
xrs->rgp.npartition_node++;
@@ -257,6 +300,7 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
struct xrs_action_load load_act;
struct solver_node *snode;
struct solver_state *xrs;
+ u32 dpm_level;
int ret;
xrs = (struct solver_state *)hdl;
@@ -281,6 +325,11 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
if (ret)
goto free_node;
+ ret = set_dpm_level(xrs, req, &dpm_level);
+ if (ret)
+ goto free_node;
+
+ snode->dpm_level = dpm_level;
snode->cb_arg = cb_arg;
drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h
index 9b1847bb46a6..a2e3c52229e9 100644
--- a/drivers/accel/amdxdna/aie2_solver.h
+++ b/drivers/accel/amdxdna/aie2_solver.h
@@ -99,6 +99,7 @@ struct clk_list_info {
struct xrs_action_ops {
int (*load)(void *cb_arg, struct xrs_action_load *action);
int (*unload)(void *cb_arg);
+ int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
};
/*
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 5478b631b73f..d11b1c83d9c3 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -11,6 +11,7 @@
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
+#include <linux/xarray.h>
#include <trace/events/amdxdna.h>
#include "amdxdna_ctx.h"
@@ -63,11 +64,11 @@ void amdxdna_hwctx_suspend(struct amdxdna_client *client)
{
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_hwctx *hwctx;
- int next = 0;
+ unsigned long hwctx_id;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
mutex_lock(&client->hwctx_lock);
- idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next)
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
xdna->dev_info->ops->hwctx_suspend(hwctx);
mutex_unlock(&client->hwctx_lock);
}
@@ -76,11 +77,11 @@ void amdxdna_hwctx_resume(struct amdxdna_client *client)
{
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_hwctx *hwctx;
- int next = 0;
+ unsigned long hwctx_id;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
mutex_lock(&client->hwctx_lock);
- idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next)
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
xdna->dev_info->ops->hwctx_resume(hwctx);
mutex_unlock(&client->hwctx_lock);
}
@@ -149,13 +150,13 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
{
struct amdxdna_hwctx *hwctx;
- int next = 0;
+ unsigned long hwctx_id;
mutex_lock(&client->hwctx_lock);
- idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+ amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
XDNA_DBG(client->xdna, "PID %d close HW context %d",
client->pid, hwctx->id);
- idr_remove(&client->hwctx_idr, hwctx->id);
+ xa_erase(&client->hwctx_xa, hwctx->id);
mutex_unlock(&client->hwctx_lock);
amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
mutex_lock(&client->hwctx_lock);
@@ -194,15 +195,13 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
hwctx->num_tiles = args->num_tiles;
hwctx->mem_size = args->mem_size;
hwctx->max_opc = args->max_opc;
- mutex_lock(&client->hwctx_lock);
- ret = idr_alloc_cyclic(&client->hwctx_idr, hwctx, 0, MAX_HWCTX_ID, GFP_KERNEL);
+ ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
+ XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
+ &client->next_hwctxid, GFP_KERNEL);
if (ret < 0) {
- mutex_unlock(&client->hwctx_lock);
XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
goto free_hwctx;
}
- hwctx->id = ret;
- mutex_unlock(&client->hwctx_lock);
hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
if (!hwctx->name) {
@@ -228,9 +227,7 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
free_name:
kfree(hwctx->name);
rm_id:
- mutex_lock(&client->hwctx_lock);
- idr_remove(&client->hwctx_idr, hwctx->id);
- mutex_unlock(&client->hwctx_lock);
+ xa_erase(&client->hwctx_xa, hwctx->id);
free_hwctx:
kfree(hwctx);
exit:
@@ -246,27 +243,24 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d
struct amdxdna_hwctx *hwctx;
int ret = 0, idx;
+ if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
+ return -EINVAL;
+
if (!drm_dev_enter(dev, &idx))
return -ENODEV;
- /*
- * Use hwctx_lock to achieve exclusion with other hwctx writers,
- * SRCU to synchronize with exec/wait command ioctls.
- *
- * The pushed jobs are handled by DRM scheduler during destroy.
- */
- mutex_lock(&client->hwctx_lock);
- hwctx = idr_find(&client->hwctx_idr, args->handle);
+ hwctx = xa_erase(&client->hwctx_xa, args->handle);
if (!hwctx) {
- mutex_unlock(&client->hwctx_lock);
ret = -EINVAL;
XDNA_DBG(xdna, "PID %d HW context %d not exist",
client->pid, args->handle);
goto out;
}
- idr_remove(&client->hwctx_idr, hwctx->id);
- mutex_unlock(&client->hwctx_lock);
+ /*
+ * The pushed jobs are handled by DRM scheduler during destroy.
+ * SRCU to synchronize with exec command ioctls.
+ */
amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
@@ -286,6 +280,9 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
void *buf;
u64 val;
+ if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
+ return -EINVAL;
+
if (!xdna->dev_info->ops->hwctx_config)
return -EOPNOTSUPP;
@@ -324,7 +321,7 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
mutex_lock(&xdna->dev_lock);
idx = srcu_read_lock(&client->hwctx_srcu);
- hwctx = idr_find(&client->hwctx_idr, args->handle);
+ hwctx = xa_load(&client->hwctx_xa, args->handle);
if (!hwctx) {
XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
ret = -EINVAL;
@@ -436,7 +433,7 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
}
idx = srcu_read_lock(&client->hwctx_srcu);
- hwctx = idr_find(&client->hwctx_idr, hwctx_hdl);
+ hwctx = xa_load(&client->hwctx_xa, hwctx_hdl);
if (!hwctx) {
XDNA_DBG(xdna, "PID %d failed to get hwctx %d",
client->pid, hwctx_hdl);
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 4dfeca306d98..606433d73236 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -552,7 +552,7 @@ int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm
struct drm_gem_object *gobj;
int ret = 0;
- if (args->ext || args->ext_flags)
+ if (args->ext || args->ext_flags || args->pad)
return -EINVAL;
gobj = drm_gem_object_lookup(filp, args->handle);
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index 415d99abaaa3..1afc8079e3d1 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -6,7 +6,9 @@
#include <drm/drm_device.h>
#include <drm/drm_managed.h>
#include <linux/bitfield.h>
+#include <linux/interrupt.h>
#include <linux/iopoll.h>
+#include <linux/xarray.h>
#define CREATE_TRACE_POINTS
#include <trace/events/amdxdna.h>
@@ -54,8 +56,8 @@ struct mailbox_channel {
struct xdna_mailbox_chann_res res[CHAN_RES_NUM];
int msix_irq;
u32 iohub_int_addr;
- struct idr chan_idr;
- spinlock_t chan_idr_lock; /* protect chan_idr */
+ struct xarray chan_xa;
+ u32 next_msgid;
u32 x2i_tail;
/* Received msg related fields */
@@ -164,19 +166,17 @@ static inline int mailbox_validate_msgid(int msg_id)
static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbox_msg *mb_msg)
{
- unsigned long flags;
- int msg_id;
+ u32 msg_id;
+ int ret;
- spin_lock_irqsave(&mb_chann->chan_idr_lock, flags);
- msg_id = idr_alloc_cyclic(&mb_chann->chan_idr, mb_msg, 0,
- MAX_MSG_ID_ENTRIES, GFP_NOWAIT);
- spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags);
- if (msg_id < 0)
- return msg_id;
+ ret = xa_alloc_cyclic_irq(&mb_chann->chan_xa, &msg_id, mb_msg,
+ XA_LIMIT(0, MAX_MSG_ID_ENTRIES - 1),
+ &mb_chann->next_msgid, GFP_NOWAIT);
+ if (ret < 0)
+ return ret;
/*
- * The IDR becomes less efficient when dealing with larger IDs.
- * Thus, add MAGIC_VAL to the higher bits.
+ * Add MAGIC_VAL to the higher bits.
*/
msg_id |= MAGIC_VAL;
return msg_id;
@@ -184,25 +184,17 @@ static int mailbox_acquire_msgid(struct mailbox_channel *mb_chann, struct mailbo
static void mailbox_release_msgid(struct mailbox_channel *mb_chann, int msg_id)
{
- unsigned long flags;
-
msg_id &= ~MAGIC_VAL_MASK;
- spin_lock_irqsave(&mb_chann->chan_idr_lock, flags);
- idr_remove(&mb_chann->chan_idr, msg_id);
- spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags);
+ xa_erase_irq(&mb_chann->chan_xa, msg_id);
}
-static int mailbox_release_msg(int id, void *p, void *data)
+static void mailbox_release_msg(struct mailbox_channel *mb_chann,
+ struct mailbox_msg *mb_msg)
{
- struct mailbox_channel *mb_chann = data;
- struct mailbox_msg *mb_msg = p;
-
MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
mb_msg->notify_cb(mb_msg->handle, NULL, 0);
kfree(mb_msg);
-
- return 0;
}
static int
@@ -254,7 +246,6 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
void *data)
{
struct mailbox_msg *mb_msg;
- unsigned long flags;
int msg_id;
int ret;
@@ -265,15 +256,11 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
}
msg_id &= ~MAGIC_VAL_MASK;
- spin_lock_irqsave(&mb_chann->chan_idr_lock, flags);
- mb_msg = idr_find(&mb_chann->chan_idr, msg_id);
+ mb_msg = xa_erase_irq(&mb_chann->chan_xa, msg_id);
if (!mb_msg) {
MB_ERR(mb_chann, "Cannot find msg 0x%x", msg_id);
- spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags);
return -EINVAL;
}
- idr_remove(&mb_chann->chan_idr, msg_id);
- spin_unlock_irqrestore(&mb_chann->chan_idr_lock, flags);
MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
header->opcode, header->total_size, header->id);
@@ -497,8 +484,7 @@ xdna_mailbox_create_channel(struct mailbox *mb,
memcpy(&mb_chann->res[CHAN_RES_X2I], x2i, sizeof(*x2i));
memcpy(&mb_chann->res[CHAN_RES_I2X], i2x, sizeof(*i2x));
- spin_lock_init(&mb_chann->chan_idr_lock);
- idr_init(&mb_chann->chan_idr);
+ xa_init_flags(&mb_chann->chan_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
mb_chann->x2i_tail = mailbox_get_tailptr(mb_chann, CHAN_RES_X2I);
mb_chann->i2x_head = mailbox_get_headptr(mb_chann, CHAN_RES_I2X);
@@ -530,16 +516,18 @@ free_and_out:
int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
{
- if (!mb_chann)
- return 0;
+ struct mailbox_msg *mb_msg;
+ unsigned long msg_id;
MB_DBG(mb_chann, "IRQ disabled and RX work cancelled");
free_irq(mb_chann->msix_irq, mb_chann);
destroy_workqueue(mb_chann->work_q);
/* We can clean up and release resources */
- idr_for_each(&mb_chann->chan_idr, mailbox_release_msg, mb_chann);
- idr_destroy(&mb_chann->chan_idr);
+ xa_for_each(&mb_chann->chan_xa, msg_id, mb_msg)
+ mailbox_release_msg(mb_chann, mb_msg);
+
+ xa_destroy(&mb_chann->chan_xa);
MB_DBG(mb_chann, "Mailbox channel destroyed, irq: %d", mb_chann->msix_irq);
kfree(mb_chann);
@@ -548,9 +536,6 @@ int xdna_mailbox_destroy_channel(struct mailbox_channel *mb_chann)
void xdna_mailbox_stop_channel(struct mailbox_channel *mb_chann)
{
- if (!mb_chann)
- return;
-
/* Disable an irq and wait. This might sleep. */
disable_irq(mb_chann->msix_irq);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 02533732d4ca..194e44fc243d 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -39,6 +39,7 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
{ 0x17f0, 0x0, &dev_npu2_info },
{ 0x17f0, 0x10, &dev_npu4_info },
{ 0x17f0, 0x11, &dev_npu5_info },
+ { 0x17f0, 0x20, &dev_npu6_info },
{0}
};
@@ -77,7 +78,7 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
}
mutex_init(&client->hwctx_lock);
init_srcu_struct(&client->hwctx_srcu);
- idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1);
+ xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
mutex_init(&client->mm_lock);
mutex_lock(&xdna->dev_lock);
@@ -108,7 +109,7 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
XDNA_DBG(xdna, "closing pid %d", client->pid);
- idr_destroy(&client->hwctx_idr);
+ xa_destroy(&client->hwctx_xa);
cleanup_srcu_struct(&client->hwctx_srcu);
mutex_destroy(&client->hwctx_lock);
mutex_destroy(&client->mm_lock);
@@ -160,6 +161,24 @@ static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct
return ret;
}
+static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_set_state *args = data;
+ int ret;
+
+ if (!xdna->dev_info->ops->set_aie_state)
+ return -EOPNOTSUPP;
+
+ XDNA_DBG(xdna, "Request parameter %u", args->param);
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->set_aie_state(client, args);
+ mutex_unlock(&xdna->dev_lock);
+
+ return ret;
+}
+
static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
/* Context */
DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
@@ -173,6 +192,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
/* AIE hardware */
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
};
static const struct file_operations amdxdna_fops = {
@@ -390,8 +410,8 @@ static int amdxdna_rpmops_resume(struct device *dev)
}
static const struct dev_pm_ops amdxdna_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
- SET_RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL)
+ SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
+ RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL)
};
static struct pci_driver amdxdna_pci_driver = {
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index c50d65a050ad..37848a8d8031 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -6,12 +6,29 @@
#ifndef _AMDXDNA_PCI_DRV_H_
#define _AMDXDNA_PCI_DRV_H_
+#include <linux/xarray.h>
+
#define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args)
#define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
#define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
#define XDNA_DBG(xdna, fmt, args...) drm_dbg(&(xdna)->ddev, fmt, ##args)
#define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args)
+#define XDNA_MBZ_DBG(xdna, ptr, sz) \
+ ({ \
+ int __i; \
+ int __ret = 0; \
+ u8 *__ptr = (u8 *)(ptr); \
+ for (__i = 0; __i < (sz); __i++) { \
+ if (__ptr[__i]) { \
+ XDNA_DBG(xdna, "MBZ check failed"); \
+ __ret = -EINVAL; \
+ break; \
+ } \
+ } \
+ __ret; \
+ })
+
#define to_xdna_dev(drm_dev) \
((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev))
@@ -20,6 +37,7 @@ extern const struct drm_driver amdxdna_drm_drv;
struct amdxdna_client;
struct amdxdna_dev;
struct amdxdna_drm_get_info;
+struct amdxdna_drm_set_state;
struct amdxdna_gem_obj;
struct amdxdna_hwctx;
struct amdxdna_sched_job;
@@ -40,6 +58,7 @@ struct amdxdna_dev_ops {
void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
+ int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
};
/*
@@ -100,7 +119,8 @@ struct amdxdna_client {
struct mutex hwctx_lock; /* protect hwctx */
/* do NOT wait this srcu when hwctx_lock is held */
struct srcu_struct hwctx_srcu;
- struct idr hwctx_idr;
+ struct xarray hwctx_xa;
+ u32 next_hwctxid;
struct amdxdna_dev *xdna;
struct drm_file *filp;
@@ -111,11 +131,15 @@ struct amdxdna_client {
int pasid;
};
+#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
+ xa_for_each(&(client)->hwctx_xa, hwctx_id, entry)
+
/* Add device info below */
extern const struct amdxdna_dev_info dev_npu1_info;
extern const struct amdxdna_dev_info dev_npu2_info;
extern const struct amdxdna_dev_info dev_npu4_info;
extern const struct amdxdna_dev_info dev_npu5_info;
+extern const struct amdxdna_dev_info dev_npu6_info;
int amdxdna_sysfs_init(struct amdxdna_dev *xdna);
void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index f00c50461b09..e408af57e378 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -44,18 +44,30 @@
#define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE
#define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
-#define NPU1_RT_CFG_TYPE_PDI_LOAD 2
-#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1
+const struct rt_config npu1_default_rt_cfg[] = {
+ { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 0 },
+};
-#define NPU1_MPNPUCLK_FREQ_MAX 600
-#define NPU1_HCLK_FREQ_MAX 1024
+const struct dpm_clk_freq npu1_dpm_clk_table[] = {
+ {400, 800},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {720, 1309},
+ {720, 1309},
+ {847, 1600},
+ { 0 }
+};
const struct amdxdna_dev_priv npu1_dev_priv = {
.fw_path = "amdnpu/1502_00/npu.sbin",
.protocol_major = 0x5,
- .protocol_minor = 0x1,
- .rt_config = {NPU1_RT_CFG_TYPE_PDI_LOAD, NPU1_RT_CFG_VAL_PDI_LOAD_APP},
+ .protocol_minor = 0x7,
+ .rt_config = npu1_default_rt_cfg,
+ .dpm_clk_tbl = npu1_dpm_clk_table,
.col_align = COL_ALIGN_NONE,
.mbox_dev_addr = NPU1_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
@@ -80,8 +92,9 @@ const struct amdxdna_dev_priv npu1_dev_priv = {
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
},
- .smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX,
- .smu_hclk_freq_max = NPU1_HCLK_FREQ_MAX,
+ .hw_ops = {
+ .set_dpm = npu1_set_dpm,
+ },
};
const struct amdxdna_dev_info dev_npu1_info = {
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
index 00cb381031d2..286bd0d475e2 100644
--- a/drivers/accel/amdxdna/npu2_regs.c
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -61,18 +61,12 @@
#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
-#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU2_MPNPUCLK_FREQ_MAX 1267
-#define NPU2_HCLK_FREQ_MAX 1800
-
const struct amdxdna_dev_priv npu2_dev_priv = {
.fw_path = "amdnpu/17f0_00/npu.sbin",
.protocol_major = 0x6,
- .protocol_minor = 0x1,
- .rt_config = {NPU2_RT_CFG_TYPE_PDI_LOAD, NPU2_RT_CFG_VAL_PDI_LOAD_APP},
+ .protocol_minor = 0x6,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU2_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
@@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu2_dev_priv = {
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60),
},
- .smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
- .smu_hclk_freq_max = NPU2_HCLK_FREQ_MAX,
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
};
const struct amdxdna_dev_info dev_npu2_info = {
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index b6dae9667cca..00c52833ce89 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -61,18 +61,33 @@
#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
-#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1
+const struct rt_config npu4_default_rt_cfg[] = {
+ { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 0 },
+};
-#define NPU4_MPNPUCLK_FREQ_MAX 1267
-#define NPU4_HCLK_FREQ_MAX 1800
+const struct dpm_clk_freq npu4_dpm_clk_table[] = {
+ {396, 792},
+ {600, 1056},
+ {792, 1152},
+ {975, 1267},
+ {975, 1267},
+ {1056, 1408},
+ {1152, 1584},
+ {1267, 1800},
+ { 0 }
+};
const struct amdxdna_dev_priv npu4_dev_priv = {
.fw_path = "amdnpu/17f0_10/npu.sbin",
.protocol_major = 0x6,
- .protocol_minor = 0x1,
- .rt_config = {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_APP},
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU4_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
@@ -97,8 +112,9 @@ const struct amdxdna_dev_priv npu4_dev_priv = {
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60),
},
- .smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX,
- .smu_hclk_freq_max = NPU4_HCLK_FREQ_MAX,
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
};
const struct amdxdna_dev_info dev_npu4_info = {
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index bed1baf8e160..118849272f27 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -61,18 +61,12 @@
#define NPU5_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
-#define NPU5_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU5_MPNPUCLK_FREQ_MAX 1267
-#define NPU5_HCLK_FREQ_MAX 1800
-
const struct amdxdna_dev_priv npu5_dev_priv = {
.fw_path = "amdnpu/17f0_11/npu.sbin",
.protocol_major = 0x6,
- .protocol_minor = 0x1,
- .rt_config = {NPU5_RT_CFG_TYPE_PDI_LOAD, NPU5_RT_CFG_VAL_PDI_LOAD_APP},
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU5_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
@@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu5_dev_priv = {
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU5_SMU, MP1_C2PMSG_60),
},
- .smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX,
- .smu_hclk_freq_max = NPU5_HCLK_FREQ_MAX,
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
};
const struct amdxdna_dev_info dev_npu5_info = {
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
new file mode 100644
index 000000000000..f46c760cefc7
--- /dev/null
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/sizes.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_mailbox.h"
+#include "amdxdna_pci_drv.h"
+
+/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
+#define MPNPU_PUB_SEC_INTR 0x3010060
+#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
+#define MPNPU_PUB_SCRATCH0 0x301006C
+#define MPNPU_PUB_SCRATCH1 0x3010070
+#define MPNPU_PUB_SCRATCH2 0x3010074
+#define MPNPU_PUB_SCRATCH3 0x3010078
+#define MPNPU_PUB_SCRATCH4 0x301007C
+#define MPNPU_PUB_SCRATCH5 0x3010080
+#define MPNPU_PUB_SCRATCH6 0x3010084
+#define MPNPU_PUB_SCRATCH7 0x3010088
+#define MPNPU_PUB_SCRATCH8 0x301008C
+#define MPNPU_PUB_SCRATCH9 0x3010090
+#define MPNPU_PUB_SCRATCH10 0x3010094
+#define MPNPU_PUB_SCRATCH11 0x3010098
+#define MPNPU_PUB_SCRATCH12 0x301009C
+#define MPNPU_PUB_SCRATCH13 0x30100A0
+#define MPNPU_PUB_SCRATCH14 0x30100A4
+#define MPNPU_PUB_SCRATCH15 0x30100A8
+#define MP0_C2PMSG_73 0x3810A24
+#define MP0_C2PMSG_123 0x3810AEC
+
+#define MP1_C2PMSG_0 0x3B10900
+#define MP1_C2PMSG_60 0x3B109F0
+#define MP1_C2PMSG_61 0x3B109F4
+
+#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
+#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
+#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
+#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
+
+#define MMNPU_APERTURE0_BASE 0x3000000
+#define MMNPU_APERTURE1_BASE 0x3600000
+#define MMNPU_APERTURE3_BASE 0x3810000
+#define MMNPU_APERTURE4_BASE 0x3B10000
+
+/* PCIe BAR Index for NPU6 */
+#define NPU6_REG_BAR_INDEX 0
+#define NPU6_MBOX_BAR_INDEX 0
+#define NPU6_PSP_BAR_INDEX 4
+#define NPU6_SMU_BAR_INDEX 5
+#define NPU6_SRAM_BAR_INDEX 2
+/* Associated BARs and Apertures */
+#define NPU6_REG_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU6_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
+#define NPU6_PSP_BAR_BASE MMNPU_APERTURE3_BASE
+#define NPU6_SMU_BAR_BASE MMNPU_APERTURE4_BASE
+#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
+
+const struct amdxdna_dev_priv npu6_dev_priv = {
+ .fw_path = "amdnpu/17f0_10/npu.sbin",
+ .protocol_major = 0x6,
+ .protocol_minor = 12,
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
+ .col_align = COL_ALIGN_NATURE,
+ .mbox_dev_addr = NPU6_MBOX_BAR_BASE,
+ .mbox_size = 0, /* Use BAR size */
+ .sram_dev_addr = NPU6_SRAM_BAR_BASE,
+ .sram_offs = {
+ DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
+ DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU6_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
+ },
+ .psp_regs_off = {
+ DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU6_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
+ DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU6_REG, MPNPU_PUB_SCRATCH4),
+ DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU6_REG, MPNPU_PUB_SCRATCH9),
+ DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73),
+ DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123),
+ DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
+ },
+ .smu_regs_off = {
+ DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0),
+ DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU6_SMU, MP1_C2PMSG_60),
+ DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU6_SMU, MMNPU_APERTURE4_BASE),
+ DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
+ DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU6_SMU, MP1_C2PMSG_60),
+ },
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+
+};
+
+const struct amdxdna_dev_info dev_npu6_info = {
+ .reg_bar = NPU6_REG_BAR_INDEX,
+ .mbox_bar = NPU6_MBOX_BAR_INDEX,
+ .sram_bar = NPU6_SRAM_BAR_INDEX,
+ .psp_bar = NPU6_PSP_BAR_INDEX,
+ .smu_bar = NPU6_SMU_BAR_INDEX,
+ .first_col = 0,
+ .dev_mem_buf_shift = 15, /* 32 KiB aligned */
+ .dev_mem_base = AIE2_DEVM_BASE,
+ .dev_mem_size = AIE2_DEVM_SIZE,
+ .vbnv = "RyzenAI-npu6",
+ .device_type = AMDXDNA_DEV_TYPE_KMQ,
+ .dev_priv = &npu6_dev_priv,
+ .ops = &aie2_ops,
+};