summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c246
1 files changed, 210 insertions, 36 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 4b6814949aad..7f6f1a842b0b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -58,7 +58,7 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
struct queue *q);
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
- struct queue *q);
+ struct queue *q, const uint32_t *restore_sdma_id);
static void kfd_process_hw_exception(struct work_struct *work);
static inline
@@ -144,7 +144,13 @@ static void decrement_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count--;
}
-static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+/*
+ * Allocate a doorbell ID to this queue.
+ * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
+ */
+static int allocate_doorbell(struct qcm_process_device *qpd,
+ struct queue *q,
+ uint32_t const *restore_id)
{
struct kfd_dev *dev = qpd->dqm->dev;
@@ -152,6 +158,10 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
/* On pre-SOC15 chips we need to use the queue ID to
* preserve the user mode ABI.
*/
+
+ if (restore_id && *restore_id != q->properties.queue_id)
+ return -EINVAL;
+
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
@@ -160,25 +170,37 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
* The doobell index distance between RLC (2*i) and (2*i+1)
* for a SDMA engine is 512.
*/
- uint32_t *idx_offset =
- dev->shared_resources.sdma_doorbell_idx;
- q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
- + (q->properties.sdma_queue_id & 1)
- * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
- + (q->properties.sdma_queue_id >> 1);
+ uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx;
+ uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
+ + (q->properties.sdma_queue_id & 1)
+ * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
+ + (q->properties.sdma_queue_id >> 1);
+
+ if (restore_id && *restore_id != valid_id)
+ return -EINVAL;
+ q->doorbell_id = valid_id;
} else {
- /* For CP queues on SOC15 reserve a free doorbell ID */
- unsigned int found;
-
- found = find_first_zero_bit(qpd->doorbell_bitmap,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
- if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_debug("No doorbells available");
- return -EBUSY;
+ /* For CP queues on SOC15 */
+ if (restore_id) {
+ /* make sure that ID is free */
+ if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
+ return -EINVAL;
+
+ q->doorbell_id = *restore_id;
+ } else {
+ /* or reserve a free doorbell ID */
+ unsigned int found;
+
+ found = find_first_zero_bit(qpd->doorbell_bitmap,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+ pr_debug("No doorbells available");
+ return -EBUSY;
+ }
+ set_bit(found, qpd->doorbell_bitmap);
+ q->doorbell_id = found;
}
- set_bit(found, qpd->doorbell_bitmap);
- q->doorbell_id = found;
}
q->properties.doorbell_off =
@@ -299,7 +321,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd, const void *restore_ctl_stack)
{
struct mqd_manager *mqd_mgr;
int retval;
@@ -339,13 +363,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
q->pipe, q->queue);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- retval = allocate_sdma_queue(dqm, q);
+ retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
if (retval)
goto deallocate_vmid;
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
}
- retval = allocate_doorbell(qpd, q);
+ retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
if (retval)
goto out_deallocate_hqd;
@@ -358,8 +382,15 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
- mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
- &q->gart_mqd_addr, &q->properties);
+
+ if (qd)
+ mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+ &q->properties, restore_mqd, restore_ctl_stack,
+ qd->ctl_stack_size);
+ else
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
+
if (q->properties.is_active) {
if (!dqm->sched_running) {
WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
@@ -449,6 +480,65 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
dqm->allocated_queues[q->pipe] |= (1 << q->queue);
}
+#define SQ_IND_CMD_CMD_KILL 0x00000003
+#define SQ_IND_CMD_MODE_BROADCAST 0x00000001
+
+static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
+{
+ int status = 0;
+ unsigned int vmid;
+ uint16_t queried_pasid;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_process_device *pdd;
+ int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
+ int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
+
+ reg_sq_cmd.u32All = 0;
+ reg_gfx_index.u32All = 0;
+
+ pr_debug("Killing all process wavefronts\n");
+
+ /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+ * ATC_VMID15_PASID_MAPPING
+ * to check which VMID the current process is mapped to.
+ */
+
+ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+ status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
+ (dev->adev, vmid, &queried_pasid);
+
+ if (status && queried_pasid == p->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
+ vmid, p->pasid);
+ break;
+ }
+ }
+
+ if (vmid > last_vmid_to_scan) {
+ pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
+ return -EFAULT;
+ }
+
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd)
+ return -EFAULT;
+
+ reg_gfx_index.bits.sh_broadcast_writes = 1;
+ reg_gfx_index.bits.se_broadcast_writes = 1;
+ reg_gfx_index.bits.instance_broadcast_writes = 1;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
+ reg_sq_cmd.bits.vm_id = vmid;
+
+ dev->kfd2kgd->wave_control_execute(dev->adev,
+ reg_gfx_index.u32All,
+ reg_sq_cmd.u32All);
+
+ return 0;
+}
+
/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
* to avoid asynchronized access
*/
@@ -1034,7 +1124,7 @@ static void pre_reset(struct device_queue_manager *dqm)
}
static int allocate_sdma_queue(struct device_queue_manager *dqm,
- struct queue *q)
+ struct queue *q, const uint32_t *restore_sdma_id)
{
int bit;
@@ -1044,9 +1134,21 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
return -ENOMEM;
}
- bit = __ffs64(dqm->sdma_bitmap);
- dqm->sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
+ if (restore_sdma_id) {
+ /* Re-use existing sdma_id */
+ if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) {
+ pr_err("SDMA queue already in use\n");
+ return -EBUSY;
+ }
+ dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id);
+ q->sdma_id = *restore_sdma_id;
+ } else {
+ /* Find first available sdma_id */
+ bit = __ffs64(dqm->sdma_bitmap);
+ dqm->sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ }
+
q->properties.sdma_engine_id = q->sdma_id %
kfd_get_num_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
@@ -1056,9 +1158,19 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
pr_err("No more XGMI SDMA queue to allocate\n");
return -ENOMEM;
}
- bit = __ffs64(dqm->xgmi_sdma_bitmap);
- dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
+ if (restore_sdma_id) {
+ /* Re-use existing sdma_id */
+ if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) {
+ pr_err("SDMA queue already in use\n");
+ return -EBUSY;
+ }
+ dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id);
+ q->sdma_id = *restore_sdma_id;
+ } else {
+ bit = __ffs64(dqm->xgmi_sdma_bitmap);
+ dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ }
/* sdma_engine_id is sdma id including
* both PCIe-optimized SDMAs and XGMI-
* optimized SDMAs. The calculation below
@@ -1288,7 +1400,9 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
}
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
- struct qcm_process_device *qpd)
+ struct qcm_process_device *qpd,
+ const struct kfd_criu_queue_priv_data *qd,
+ const void *restore_mqd, const void *restore_ctl_stack)
{
int retval;
struct mqd_manager *mqd_mgr;
@@ -1303,13 +1417,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm_lock(dqm);
- retval = allocate_sdma_queue(dqm, q);
+ retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
dqm_unlock(dqm);
if (retval)
goto out;
}
- retval = allocate_doorbell(qpd, q);
+ retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
if (retval)
goto out_deallocate_sdma_queue;
@@ -1334,8 +1448,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
* updates the is_evicted flag but is a no-op otherwise.
*/
q->properties.is_evicted = !!qpd->evicted;
- mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
- &q->gart_mqd_addr, &q->properties);
+
+ if (qd)
+ mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+ &q->properties, restore_mqd, restore_ctl_stack,
+ qd->ctl_stack_size);
+ else
+ mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+ &q->gart_mqd_addr, &q->properties);
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
@@ -1738,6 +1858,56 @@ static int get_wave_state(struct device_queue_manager *dqm,
ctl_stack_used_size, save_area_used_size);
}
+static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
+ const struct queue *q,
+ u32 *mqd_size,
+ u32 *ctl_stack_size)
+{
+ struct mqd_manager *mqd_mgr;
+ enum KFD_MQD_TYPE mqd_type =
+ get_mqd_type_from_queue_type(q->properties.type);
+
+ dqm_lock(dqm);
+ mqd_mgr = dqm->mqd_mgrs[mqd_type];
+ *mqd_size = mqd_mgr->mqd_size;
+ *ctl_stack_size = 0;
+
+ if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
+ mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
+
+ dqm_unlock(dqm);
+}
+
+static int checkpoint_mqd(struct device_queue_manager *dqm,
+ const struct queue *q,
+ void *mqd,
+ void *ctl_stack)
+{
+ struct mqd_manager *mqd_mgr;
+ int r = 0;
+ enum KFD_MQD_TYPE mqd_type =
+ get_mqd_type_from_queue_type(q->properties.type);
+
+ dqm_lock(dqm);
+
+ if (q->properties.is_active || !q->device->cwsr_enabled) {
+ r = -EINVAL;
+ goto dqm_unlock;
+ }
+
+ mqd_mgr = dqm->mqd_mgrs[mqd_type];
+ if (!mqd_mgr->checkpoint_mqd) {
+ r = -EOPNOTSUPP;
+ goto dqm_unlock;
+ }
+
+ mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
+
+dqm_unlock:
+ dqm_unlock(dqm);
+ return r;
+}
+
static int process_termination_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -1915,6 +2085,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.restore_process_queues = restore_process_queues_cpsch;
dqm->ops.get_wave_state = get_wave_state;
dqm->ops.reset_queues = reset_queues_cpsch;
+ dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+ dqm->ops.checkpoint_mqd = checkpoint_mqd;
break;
case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */
@@ -1934,6 +2106,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.restore_process_queues =
restore_process_queues_nocpsch;
dqm->ops.get_wave_state = get_wave_state;
+ dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+ dqm->ops.checkpoint_mqd = checkpoint_mqd;
break;
default:
pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
@@ -2005,7 +2179,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
kfree(dqm);
}
-int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
+int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
{
struct kfd_process_device *pdd;
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);