diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 246 |
1 files changed, 210 insertions, 36 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 4b6814949aad..7f6f1a842b0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -58,7 +58,7 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm, struct queue *q); static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); static int allocate_sdma_queue(struct device_queue_manager *dqm, - struct queue *q); + struct queue *q, const uint32_t *restore_sdma_id); static void kfd_process_hw_exception(struct work_struct *work); static inline @@ -144,7 +144,13 @@ static void decrement_queue_count(struct device_queue_manager *dqm, dqm->active_cp_queue_count--; } -static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) +/* + * Allocate a doorbell ID to this queue. + * If doorbell_id is passed in, make sure requested ID is valid then allocate it. + */ +static int allocate_doorbell(struct qcm_process_device *qpd, + struct queue *q, + uint32_t const *restore_id) { struct kfd_dev *dev = qpd->dqm->dev; @@ -152,6 +158,10 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) /* On pre-SOC15 chips we need to use the queue ID to * preserve the user mode ABI. */ + + if (restore_id && *restore_id != q->properties.queue_id) + return -EINVAL; + q->doorbell_id = q->properties.queue_id; } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { @@ -160,25 +170,37 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) * The doobell index distance between RLC (2*i) and (2*i+1) * for a SDMA engine is 512. */ - uint32_t *idx_offset = - dev->shared_resources.sdma_doorbell_idx; - q->doorbell_id = idx_offset[q->properties.sdma_engine_id] - + (q->properties.sdma_queue_id & 1) - * KFD_QUEUE_DOORBELL_MIRROR_OFFSET - + (q->properties.sdma_queue_id >> 1); + uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx; + uint32_t valid_id = idx_offset[q->properties.sdma_engine_id] + + (q->properties.sdma_queue_id & 1) + * KFD_QUEUE_DOORBELL_MIRROR_OFFSET + + (q->properties.sdma_queue_id >> 1); + + if (restore_id && *restore_id != valid_id) + return -EINVAL; + q->doorbell_id = valid_id; } else { - /* For CP queues on SOC15 reserve a free doorbell ID */ - unsigned int found; - - found = find_first_zero_bit(qpd->doorbell_bitmap, - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); - if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { - pr_debug("No doorbells available"); - return -EBUSY; + /* For CP queues on SOC15 */ + if (restore_id) { + /* make sure that ID is free */ + if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) + return -EINVAL; + + q->doorbell_id = *restore_id; + } else { + /* or reserve a free doorbell ID */ + unsigned int found; + + found = find_first_zero_bit(qpd->doorbell_bitmap, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); + if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { + pr_debug("No doorbells available"); + return -EBUSY; + } + set_bit(found, qpd->doorbell_bitmap); + q->doorbell_id = found; } - set_bit(found, qpd->doorbell_bitmap); - q->doorbell_id = found; } q->properties.doorbell_off = @@ -299,7 +321,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm, static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd) + struct qcm_process_device *qpd, + const struct kfd_criu_queue_priv_data *qd, + const void *restore_mqd, const void *restore_ctl_stack) { struct mqd_manager *mqd_mgr; int retval; @@ -339,13 +363,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, q->pipe, q->queue); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { - retval = allocate_sdma_queue(dqm, q); + retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); if (retval) goto deallocate_vmid; dqm->asic_ops.init_sdma_vm(dqm, q, qpd); } - retval = allocate_doorbell(qpd, q); + retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); if (retval) goto out_deallocate_hqd; @@ -358,8 +382,15 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, retval = -ENOMEM; goto out_deallocate_doorbell; } - mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, - &q->gart_mqd_addr, &q->properties); + + if (qd) + mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, + &q->properties, restore_mqd, restore_ctl_stack, + qd->ctl_stack_size); + else + mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (q->properties.is_active) { if (!dqm->sched_running) { WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); @@ -449,6 +480,65 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm, dqm->allocated_queues[q->pipe] |= (1 << q->queue); } +#define SQ_IND_CMD_CMD_KILL 0x00000003 +#define SQ_IND_CMD_MODE_BROADCAST 0x00000001 + +static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) +{ + int status = 0; + unsigned int vmid; + uint16_t queried_pasid; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_process_device *pdd; + int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; + int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; + + reg_sq_cmd.u32All = 0; + reg_gfx_index.u32All = 0; + + pr_debug("Killing all process wavefronts\n"); + + /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. + * ATC_VMID15_PASID_MAPPING + * to check which VMID the current process is mapped to. + */ + + for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { + status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info + (dev->adev, vmid, &queried_pasid); + + if (status && queried_pasid == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", + vmid, p->pasid); + break; + } + } + + if (vmid > last_vmid_to_scan) { + pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); + return -EFAULT; + } + + /* taking the VMID for that process on the safe way using PDD */ + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) + return -EFAULT; + + reg_gfx_index.bits.sh_broadcast_writes = 1; + reg_gfx_index.bits.se_broadcast_writes = 1; + reg_gfx_index.bits.instance_broadcast_writes = 1; + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; + reg_sq_cmd.bits.vm_id = vmid; + + dev->kfd2kgd->wave_control_execute(dev->adev, + reg_gfx_index.u32All, + reg_sq_cmd.u32All); + + return 0; +} + /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked * to avoid asynchronized access */ @@ -1034,7 +1124,7 @@ static void pre_reset(struct device_queue_manager *dqm) } static int allocate_sdma_queue(struct device_queue_manager *dqm, - struct queue *q) + struct queue *q, const uint32_t *restore_sdma_id) { int bit; @@ -1044,9 +1134,21 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, return -ENOMEM; } - bit = __ffs64(dqm->sdma_bitmap); - dqm->sdma_bitmap &= ~(1ULL << bit); - q->sdma_id = bit; + if (restore_sdma_id) { + /* Re-use existing sdma_id */ + if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) { + pr_err("SDMA queue already in use\n"); + return -EBUSY; + } + dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id); + q->sdma_id = *restore_sdma_id; + } else { + /* Find first available sdma_id */ + bit = __ffs64(dqm->sdma_bitmap); + dqm->sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + } + q->properties.sdma_engine_id = q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / @@ -1056,9 +1158,19 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, pr_err("No more XGMI SDMA queue to allocate\n"); return -ENOMEM; } - bit = __ffs64(dqm->xgmi_sdma_bitmap); - dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); - q->sdma_id = bit; + if (restore_sdma_id) { + /* Re-use existing sdma_id */ + if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) { + pr_err("SDMA queue already in use\n"); + return -EBUSY; + } + dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id); + q->sdma_id = *restore_sdma_id; + } else { + bit = __ffs64(dqm->xgmi_sdma_bitmap); + dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + } /* sdma_engine_id is sdma id including * both PCIe-optimized SDMAs and XGMI- * optimized SDMAs. The calculation below @@ -1288,7 +1400,9 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, } static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd) + struct qcm_process_device *qpd, + const struct kfd_criu_queue_priv_data *qd, + const void *restore_mqd, const void *restore_ctl_stack) { int retval; struct mqd_manager *mqd_mgr; @@ -1303,13 +1417,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.type == KFD_QUEUE_TYPE_SDMA || q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { dqm_lock(dqm); - retval = allocate_sdma_queue(dqm, q); + retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); dqm_unlock(dqm); if (retval) goto out; } - retval = allocate_doorbell(qpd, q); + retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); if (retval) goto out_deallocate_sdma_queue; @@ -1334,8 +1448,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, * updates the is_evicted flag but is a no-op otherwise. */ q->properties.is_evicted = !!qpd->evicted; - mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, - &q->gart_mqd_addr, &q->properties); + + if (qd) + mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, + &q->properties, restore_mqd, restore_ctl_stack, + qd->ctl_stack_size); + else + mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); list_add(&q->list, &qpd->queues_list); qpd->queue_count++; @@ -1738,6 +1858,56 @@ static int get_wave_state(struct device_queue_manager *dqm, ctl_stack_used_size, save_area_used_size); } +static void get_queue_checkpoint_info(struct device_queue_manager *dqm, + const struct queue *q, + u32 *mqd_size, + u32 *ctl_stack_size) +{ + struct mqd_manager *mqd_mgr; + enum KFD_MQD_TYPE mqd_type = + get_mqd_type_from_queue_type(q->properties.type); + + dqm_lock(dqm); + mqd_mgr = dqm->mqd_mgrs[mqd_type]; + *mqd_size = mqd_mgr->mqd_size; + *ctl_stack_size = 0; + + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) + mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); + + dqm_unlock(dqm); +} + +static int checkpoint_mqd(struct device_queue_manager *dqm, + const struct queue *q, + void *mqd, + void *ctl_stack) +{ + struct mqd_manager *mqd_mgr; + int r = 0; + enum KFD_MQD_TYPE mqd_type = + get_mqd_type_from_queue_type(q->properties.type); + + dqm_lock(dqm); + + if (q->properties.is_active || !q->device->cwsr_enabled) { + r = -EINVAL; + goto dqm_unlock; + } + + mqd_mgr = dqm->mqd_mgrs[mqd_type]; + if (!mqd_mgr->checkpoint_mqd) { + r = -EOPNOTSUPP; + goto dqm_unlock; + } + + mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); + +dqm_unlock: + dqm_unlock(dqm); + return r; +} + static int process_termination_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -1915,6 +2085,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.restore_process_queues = restore_process_queues_cpsch; dqm->ops.get_wave_state = get_wave_state; dqm->ops.reset_queues = reset_queues_cpsch; + dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; + dqm->ops.checkpoint_mqd = checkpoint_mqd; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1934,6 +2106,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.restore_process_queues = restore_process_queues_nocpsch; dqm->ops.get_wave_state = get_wave_state; + dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; + dqm->ops.checkpoint_mqd = checkpoint_mqd; break; default: pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); @@ -2005,7 +2179,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } -int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid) +int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) { struct kfd_process_device *pdd; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); |