diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1374 |
1 files changed, 1188 insertions, 186 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ee3e04e10dae..0fe1161a2182 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -31,16 +31,23 @@ #include <uapi/linux/kfd_ioctl.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/mman.h> +#include <uapi/asm-generic/mman-common.h> #include <asm/processor.h> + #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" +#include "cik_regs.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); static int kfd_mmap(struct file *, struct vm_area_struct *); +static uint32_t kfd_convert_user_mem_alloction_flags( + struct kfd_dev *dev, + uint32_t userspace_flags); +static bool kfd_is_large_bar(struct kfd_dev *dev); +static int kfd_evict(struct file *filep, struct kfd_process *p, void *data); static const char kfd_dev_name[] = "kfd"; static const struct file_operations kfd_fops = { @@ -117,7 +124,7 @@ static int kfd_open(struct inode *inode, struct file *filep) return -EPERM; } - process = kfd_create_process(current); + process = kfd_create_process(filep); if (IS_ERR(process)) return PTR_ERR(process); @@ -206,6 +213,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->ctx_save_restore_area_address = args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; + q_properties->ctl_stack_size = args->ctl_stack_size; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -270,7 +278,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return -EINVAL; } - mutex_lock(&p->mutex); + down_write(&p->lock); pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { @@ -282,8 +290,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, - 0, q_properties.type, &queue_id); + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); if (err != 0) goto err_create_queue; @@ -291,10 +298,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Return gpu_id as doorbell offset for mmap usage */ - args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id); + args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL | args->gpu_id); args->doorbell_offset <<= PAGE_SHIFT; - mutex_unlock(&p->mutex); + up_write(&p->lock); pr_debug("kfd: queue id %d was created successfully\n", args->queue_id); @@ -311,7 +318,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, err_create_queue: err_bind_process: - mutex_unlock(&p->mutex); + up_write(&p->lock); return err; } @@ -325,11 +332,11 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, args->queue_id, p->pasid); - mutex_lock(&p->mutex); + down_write(&p->lock); retval = pqm_destroy_queue(&p->pqm, args->queue_id); - mutex_unlock(&p->mutex); + up_write(&p->lock); return retval; } @@ -371,11 +378,33 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, pr_debug("kfd: updating queue id %d for PASID %d\n", args->queue_id, p->pasid); - mutex_lock(&p->mutex); + down_write(&p->lock); retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); - mutex_unlock(&p->mutex); + up_write(&p->lock); + + return retval; +} + +static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, + void *data) +{ + int retval; + struct kfd_ioctl_set_cu_mask_args *args = data; + struct queue_properties properties; + uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; + + if (get_user(properties.cu_mask, cu_mask_ptr)) + return -EFAULT; + if (properties.cu_mask == 0) + return 0; + + down_write(&p->lock); + + retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties); + + up_write(&p->lock); return retval; } @@ -403,7 +432,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep, if (dev == NULL) return -EINVAL; - mutex_lock(&p->mutex); + down_write(&p->lock); pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { @@ -427,46 +456,80 @@ static int kfd_ioctl_set_memory_policy(struct file *filep, err = -EINVAL; out: - mutex_unlock(&p->mutex); + up_write(&p->lock); return err; } -static int kfd_ioctl_dbg_register(struct file *filep, - struct kfd_process *p, void *data) +static int kfd_ioctl_set_trap_handler(struct file *filep, + struct kfd_process *p, void *data) { - struct kfd_ioctl_dbg_register_args *args = data; + struct kfd_ioctl_set_trap_handler_args *args = data; struct kfd_dev *dev; - struct kfd_dbgmgr *dbgmgr_ptr; + int err = 0; struct kfd_process_device *pdd; - bool create_ok; - long status = 0; dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); - return -EINVAL; + down_write(&p->lock); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = -ESRCH; + goto out; + } + if (!dev->cwsr_enabled || !pdd->qpd.cwsr_kaddr) { + pr_err("kfd: CWSR is not enabled, can't set trap handler.\n"); + err = -EINVAL; + goto out; } - mutex_lock(kfd_get_dbgmgr_mutex()); - mutex_lock(&p->mutex); + if (dev->dqm->ops.set_trap_handler(dev->dqm, + &pdd->qpd, + args->tba_addr, + args->tma_addr)) + err = -EINVAL; - /* - * make sure that we have pdd, if this the first queue created for - * this process - */ +out: + up_write(&p->lock); + + return err; +} + +static int +kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) +{ + long status = -EFAULT; + struct kfd_ioctl_dbg_register_args *args = data; + struct kfd_dev *dev; + struct kfd_dbgmgr *dbgmgr_ptr; + struct kfd_process_device *pdd; + bool create_ok = false; + + pr_debug("kfd:dbg: %s\n", __func__); + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", __func__); + return status; + } + + down_write(&p->lock); + mutex_lock(get_dbgmgr_mutex()); + + /* make sure that we have pdd, if this the first queue created for this process */ pdd = kfd_bind_process_to_device(dev, p); - if (IS_ERR(pdd)) { - mutex_unlock(&p->mutex); - mutex_unlock(kfd_get_dbgmgr_mutex()); + if (IS_ERR(pdd) < 0) { + mutex_unlock(get_dbgmgr_mutex()); + up_write(&p->lock); return PTR_ERR(pdd); } if (dev->dbgmgr == NULL) { /* In case of a legal call, we have no dbgmgr yet */ + create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); if (create_ok) { status = kfd_dbgmgr_register(dbgmgr_ptr, p); @@ -475,34 +538,32 @@ static int kfd_ioctl_dbg_register(struct file *filep, else dev->dbgmgr = dbgmgr_ptr; } - } else { - pr_debug("debugger already registered\n"); - status = -EINVAL; } - mutex_unlock(&p->mutex); - mutex_unlock(kfd_get_dbgmgr_mutex()); + mutex_unlock(get_dbgmgr_mutex()); + up_write(&p->lock); return status; } -static int kfd_ioctl_dbg_unrgesiter(struct file *filep, - struct kfd_process *p, void *data) +/* + * Unregister dbg IOCTL + */ + +static int +kfd_ioctl_dbg_unrgesiter(struct file *filep, struct kfd_process *p, void *data) { + long status = -EFAULT; struct kfd_ioctl_dbg_unregister_args *args = data; struct kfd_dev *dev; - long status; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) - return -EINVAL; - - if (dev->device_info->asic_family == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n"); - return -EINVAL; + if (!dev) { + dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", __func__); + return status; } - mutex_lock(kfd_get_dbgmgr_mutex()); + mutex_lock(get_dbgmgr_mutex()); status = kfd_dbgmgr_unregister(dev->dbgmgr, p); if (status == 0) { @@ -510,7 +571,7 @@ static int kfd_ioctl_dbg_unrgesiter(struct file *filep, dev->dbgmgr = NULL; } - mutex_unlock(kfd_get_dbgmgr_mutex()); + mutex_unlock(get_dbgmgr_mutex()); return status; } @@ -519,125 +580,144 @@ static int kfd_ioctl_dbg_unrgesiter(struct file *filep, * Parse and generate variable size data structure for address watch. * Total size of the buffer and # watch points is limited in order * to prevent kernel abuse. (no bearing to the much smaller HW limitation - * which is enforced by dbgdev module) + * which is enforced by dbgdev module. * please also note that the watch address itself are not "copied from user", * since it be set into the HW in user mode values. * */ -static int kfd_ioctl_dbg_address_watch(struct file *filep, - struct kfd_process *p, void *data) + +static int +kfd_ioctl_dbg_address_watch(struct file *filep, + struct kfd_process *p, + void *data) { + long status = -EFAULT; struct kfd_ioctl_dbg_address_watch_args *args = data; struct kfd_dev *dev; struct dbg_address_watch_info aw_info; - unsigned char *args_buff; - long status; - void __user *cmd_from_user; - uint64_t watch_mask_value = 0; + unsigned char *args_buff = NULL; unsigned int args_idx = 0; + uint64_t watch_mask_value = 0; memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); - dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) - return -EINVAL; + do { + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + dev_info(NULL, + "Error! kfd: In func %s >> get device by id failed\n", + __func__); + break; + } - if (dev->device_info->asic_family == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); - return -EINVAL; - } + if (args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) { + status = -EINVAL; + break; + } - cmd_from_user = (void __user *) args->content_ptr; + if (args->buf_size_in_bytes <= sizeof(*args)) { + status = -EINVAL; + break; + } - /* Validate arguments */ + /* this is the actual buffer to work with */ - if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || - (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || - (cmd_from_user == NULL)) - return -EINVAL; + args_buff = kzalloc(args->buf_size_in_bytes - + sizeof(*args), GFP_KERNEL); + if (args_buff == NULL) { + status = -ENOMEM; + break; + } - /* this is the actual buffer to work with */ - args_buff = memdup_user(cmd_from_user, - args->buf_size_in_bytes - sizeof(*args)); - if (IS_ERR(args_buff)) - return PTR_ERR(args_buff); + /* this is the actual buffer to work with */ + args_buff = memdup_user(cmd_from_user, + args->buf_size_in_bytes - sizeof(*args)); + if (IS_ERR(args_buff)) + return PTR_ERR(args_buff); - aw_info.process = p; + aw_info.process = p; - aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); - args_idx += sizeof(aw_info.num_watch_points); + aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); + args_idx += sizeof(aw_info.num_watch_points); - aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; - args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; + aw_info.watch_mode = (HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; + args_idx += sizeof(HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; - /* - * set watch address base pointer to point on the array base - * within args_buff - */ - aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; + /* set watch address base pointer to point on the array base within args_buff */ - /* skip over the addresses buffer */ - args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; + aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; - if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { - kfree(args_buff); - return -EINVAL; - } + /*skip over the addresses buffer */ + args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; - watch_mask_value = (uint64_t) args_buff[args_idx]; + if (args_idx >= args->buf_size_in_bytes) { + status = -EINVAL; + break; + } - if (watch_mask_value > 0) { - /* - * There is an array of masks. - * set watch mask base pointer to point on the array base - * within args_buff - */ - aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; + watch_mask_value = (uint64_t) args_buff[args_idx]; - /* skip over the masks buffer */ - args_idx += sizeof(aw_info.watch_mask) * - aw_info.num_watch_points; - } else { - /* just the NULL mask, set to NULL and skip over it */ - aw_info.watch_mask = NULL; - args_idx += sizeof(aw_info.watch_mask); - } + if (watch_mask_value > 0) { + /* there is an array of masks */ - if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { - kfree(args_buff); - return -EINVAL; - } + /* set watch mask base pointer to point on the array base within args_buff */ + aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; - /* Currently HSA Event is not supported for DBG */ - aw_info.watch_event = NULL; + /*skip over the masks buffer */ + args_idx += sizeof(aw_info.watch_mask) * aw_info.num_watch_points; + } - mutex_lock(kfd_get_dbgmgr_mutex()); + else + /* just the NULL mask, set to NULL and skip over it */ + { + aw_info.watch_mask = NULL; + args_idx += sizeof(aw_info.watch_mask); + } + + if (args_idx > args->buf_size_in_bytes) { + status = -EINVAL; + break; + } - status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); + aw_info.watch_event = NULL; /* Currently HSA Event is not supported for DBG */ + status = 0; - mutex_unlock(kfd_get_dbgmgr_mutex()); + } while (0); + + if (status == 0) { + mutex_lock(get_dbgmgr_mutex()); + + status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); + + mutex_unlock(get_dbgmgr_mutex()); + + } kfree(args_buff); return status; } -/* Parse and generate fixed size data structure for wave control */ -static int kfd_ioctl_dbg_wave_control(struct file *filep, - struct kfd_process *p, void *data) +/* + * Parse and generate fixed size data structure for wave control. + * Buffer is generated in a "packed" form, for avoiding structure packing/pending dependencies. + */ + +static int +kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data) { + long status = -EFAULT; struct kfd_ioctl_dbg_wave_control_args *args = data; struct kfd_dev *dev; struct dbg_wave_control_info wac_info; - unsigned char *args_buff; - uint32_t computed_buff_size; - long status; - void __user *cmd_from_user; + unsigned char *args_buff = NULL; unsigned int args_idx = 0; + uint32_t computed_buff_size; memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); /* we use compact form, independent of the packing attribute value */ + computed_buff_size = sizeof(*args) + sizeof(wac_info.mode) + sizeof(wac_info.operand) + @@ -645,26 +725,25 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep, sizeof(wac_info.dbgWave_msg.MemoryVA) + sizeof(wac_info.trapId); - dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) - return -EINVAL; - if (dev->device_info->asic_family == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); - return -EINVAL; - } + dev_info(NULL, "kfd: In func %s - start\n", __func__); - /* input size must match the computed "compact" size */ - if (args->buf_size_in_bytes != computed_buff_size) { - pr_debug("size mismatch, computed : actual %u : %u\n", - args->buf_size_in_bytes, computed_buff_size); - return -EINVAL; - } + do { + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", __func__); + break; + } - cmd_from_user = (void __user *) args->content_ptr; + /* input size must match the computed "compact" size */ - if (cmd_from_user == NULL) - return -EINVAL; + if (args->buf_size_in_bytes != computed_buff_size) { + dev_info(NULL, + "Error! kfd: In func %s >> size mismatch, computed : actual %u : %u\n", + __func__, args->buf_size_in_bytes, computed_buff_size); + status = -EINVAL; + break; + } /* copy the entire buffer from user */ @@ -673,34 +752,51 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep, if (IS_ERR(args_buff)) return PTR_ERR(args_buff); - /* move ptr to the start of the "pay-load" area */ - wac_info.process = p; + if (copy_from_user(args_buff, + (void __user *) args->content_ptr, + args->buf_size_in_bytes - sizeof(*args))) { + dev_info(NULL, + "Error! kfd: In func %s >> copy_from_user failed\n", + __func__); + break; + } + + /* move ptr to the start of the "pay-load" area */ + - wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.operand); + wac_info.process = p; - wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.mode); + wac_info.operand = (HSA_DBG_WAVEOP) *((HSA_DBG_WAVEOP *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.operand); - wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.trapId); + wac_info.mode = (HSA_DBG_WAVEMODE) *((HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.mode); - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = - *((uint32_t *)(&args_buff[args_idx])); - wac_info.dbgWave_msg.MemoryVA = NULL; + wac_info.trapId = (uint32_t) *((uint32_t *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.trapId); - mutex_lock(kfd_get_dbgmgr_mutex()); + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = *((uint32_t *)(&args_buff[args_idx])); + wac_info.dbgWave_msg.MemoryVA = NULL; - pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", - wac_info.process, wac_info.operand, - wac_info.mode, wac_info.trapId, - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); + status = 0; + + } while (0); + if (status == 0) { + mutex_lock(get_dbgmgr_mutex()); + + dev_info(NULL, + "kfd: In func %s >> calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", + __func__, wac_info.process, wac_info.operand, wac_info.mode, wac_info.trapId, + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - pr_debug("Returned status of dbg manager is %ld\n", status); + status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); - mutex_unlock(kfd_get_dbgmgr_mutex()); + dev_info(NULL, "kfd: In func %s >> returned status of dbg manager is %ld\n", __func__, status); + + mutex_unlock(get_dbgmgr_mutex()); + + } kfree(args_buff); @@ -715,12 +811,13 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, struct timespec64 time; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) - return -EINVAL; - - /* Reading GPU clock counter from KGD */ - args->gpu_clock_counter = - dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); + if (dev) + /* Reading GPU clock counter from KGD */ + args->gpu_clock_counter = + dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); + else + /* Node without GPU resource */ + args->gpu_clock_counter = 0; /* No access to rdtsc. Using raw monotonic time */ getrawmonotonic64(&time); @@ -747,7 +844,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, args->num_of_nodes = 0; - mutex_lock(&p->mutex); + down_write(&p->lock); /*if the process-device list isn't empty*/ if (kfd_has_process_device_data(p)) { @@ -786,52 +883,180 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); } - mutex_unlock(&p->mutex); + up_write(&p->lock); return 0; } -static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, - void *data) +static int kfd_ioctl_get_process_apertures_new(struct file *filp, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_process_apertures_new_args *args = data; + struct kfd_process_device_apertures *pa; + struct kfd_process_device *pdd; + uint32_t nodes = 0; + int ret; + + dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); + + if (args->num_of_nodes == 0) { + /* Return number of nodes, so that user space can alloacate + * sufficient memory */ + down_write(&p->lock); + + if (!kfd_has_process_device_data(p)) { + up_write(&p->lock); + return 0; + } + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); + do { + args->num_of_nodes++; + } while ((pdd = + kfd_get_next_process_device_data(p, pdd)) != NULL); + + up_write(&p->lock); + return 0; + } + + /* Fill in process-aperture information for all available + * nodes, but not more than args->num_of_nodes as that is + * the amount of memory allocated by user */ + pa = kzalloc((sizeof(struct kfd_process_device_apertures) * + args->num_of_nodes), GFP_KERNEL); + if (!pa) + return -ENOMEM; + + down_write(&p->lock); + + if (!kfd_has_process_device_data(p)) { + up_write(&p->lock); + args->num_of_nodes = 0; + kfree(pa); + return 0; + } + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); + do { + pa[nodes].gpu_id = pdd->dev->id; + pa[nodes].lds_base = pdd->lds_base; + pa[nodes].lds_limit = pdd->lds_limit; + pa[nodes].gpuvm_base = pdd->gpuvm_base; + pa[nodes].gpuvm_limit = pdd->gpuvm_limit; + pa[nodes].scratch_base = pdd->scratch_base; + pa[nodes].scratch_limit = pdd->scratch_limit; + + dev_dbg(kfd_device, + "gpu id %u\n", pdd->dev->id); + dev_dbg(kfd_device, + "lds_base %llX\n", pdd->lds_base); + dev_dbg(kfd_device, + "lds_limit %llX\n", pdd->lds_limit); + dev_dbg(kfd_device, + "gpuvm_base %llX\n", pdd->gpuvm_base); + dev_dbg(kfd_device, + "gpuvm_limit %llX\n", pdd->gpuvm_limit); + dev_dbg(kfd_device, + "scratch_base %llX\n", pdd->scratch_base); + dev_dbg(kfd_device, + "scratch_limit %llX\n", pdd->scratch_limit); + nodes++; + } while ( + (pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && + (nodes < args->num_of_nodes)); + up_write(&p->lock); + + args->num_of_nodes = nodes; + ret = copy_to_user( + (void __user *)args->kfd_process_device_apertures_ptr, + pa, + (nodes * sizeof(struct kfd_process_device_apertures))); + kfree(pa); + return ret ? -EFAULT : 0; +} + +static int +kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, void *data) { struct kfd_ioctl_create_event_args *args = data; - int err; + struct kfd_dev *kfd; + struct kfd_process_device *pdd; + int err = -EINVAL; + void *mem, *kern_addr = NULL; - err = kfd_event_create(filp, p, args->event_type, - args->auto_reset != 0, args->node_id, - &args->event_id, &args->event_trigger_data, - &args->event_page_offset, - &args->event_slot_index); + pr_debug("amdkfd: Event page offset 0x%llx\n", args->event_page_offset); + + if (args->event_page_offset) { + kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); + if (!kfd) { + pr_err("amdkfd: can't find kfd device\n"); + return -EFAULT; + } + if (KFD_IS_DGPU(kfd->device_info->asic_family)) { + down_write(&p->lock); + pdd = kfd_bind_process_to_device(kfd, p); + if (IS_ERR(pdd) < 0) { + err = PTR_ERR(pdd); + up_write(&p->lock); + return -EFAULT; + } + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->event_page_offset)); + if (!mem) { + pr_err("amdkfd: can't find BO offset is 0x%llx\n", + args->event_page_offset); + up_write(&p->lock); + return -EFAULT; + } + up_write(&p->lock); + + /* Map dGPU gtt BO to kernel */ + kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, + mem, &kern_addr); + } + } + + err = kfd_event_create(filp, p, + args->event_type, + args->auto_reset != 0, + args->node_id, + &args->event_id, + &args->event_trigger_data, + &args->event_page_offset, + &args->event_slot_index, + kern_addr); return err; } -static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, - void *data) +static int +kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, void *data) { struct kfd_ioctl_destroy_event_args *args = data; return kfd_event_destroy(p, args->event_id); } -static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, - void *data) +static int +kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, void *data) { struct kfd_ioctl_set_event_args *args = data; return kfd_set_event(p, args->event_id); } -static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, - void *data) +static int +kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, void *data) { struct kfd_ioctl_reset_event_args *args = data; return kfd_reset_event(p, args->event_id); } -static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, - void *data) +static int +kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, void *data) { struct kfd_ioctl_wait_events_args *args = data; enum kfd_event_wait_result wait_result; @@ -846,6 +1071,711 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, return err; } +static int kfd_ioctl_alloc_scratch_memory(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_alloc_memory_of_gpu_args *args = + (struct kfd_ioctl_alloc_memory_of_gpu_args *)data; + struct kfd_process_device *pdd; + struct kfd_dev *dev; + long err; + + if (args->size == 0) + return -EINVAL; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + down_write(&p->lock); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd) < 0) { + err = PTR_ERR(pdd); + goto bind_process_to_device_fail; + } + + pdd->sh_hidden_private_base_vmid = args->va_addr; + pdd->qpd.sh_hidden_private_base = args->va_addr; + + up_write(&p->lock); + + if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) { + err = dev->kfd2kgd->alloc_memory_of_scratch( + dev->kgd, args->va_addr, pdd->qpd.vmid); + if (err != 0) + goto alloc_memory_of_scratch_failed; + } + + return 0; + +bind_process_to_device_fail: + up_write(&p->lock); +alloc_memory_of_scratch_failed: + return -EFAULT; +} + +static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; + struct kfd_process_device *pdd; + void *mem; + struct kfd_dev *dev; + int idr_handle; + long err; + + if (args->size == 0) + return -EINVAL; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + down_write(&p->lock); + pdd = kfd_bind_process_to_device(dev, p); + up_write(&p->lock); + if (IS_ERR(pdd) < 0) + return PTR_ERR(pdd); + + err = dev->kfd2kgd->alloc_memory_of_gpu( + dev->kgd, args->va_addr, args->size, + pdd->vm, (struct kgd_mem **) &mem, NULL, NULL, pdd, 0); + + if (err != 0) + return err; + + down_write(&p->lock); + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, + args->va_addr, args->size); + up_write(&p->lock); + if (idr_handle < 0) { + dev->kfd2kgd->free_memory_of_gpu(dev->kgd, + (struct kgd_mem *) mem); + return -EFAULT; + } + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + + return 0; +} + +bool kfd_is_large_bar(struct kfd_dev *dev) +{ + struct kfd_local_mem_info mem_info; + + if (debug_largebar) { + pr_debug("amdkfd: simulate large-bar allocation on non large-bar machine\n"); + return true; + } + + if (!KFD_IS_DGPU(dev->device_info->asic_family)) + return false; + + dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); + if (mem_info.local_mem_size_private == 0 && + mem_info.local_mem_size_public > 0) + return true; + return false; +} + +static uint32_t kfd_convert_user_mem_alloction_flags( + struct kfd_dev *dev, + uint32_t userspace_flags) +{ + uint32_t kernel_allocation_flags; + + kernel_allocation_flags = 0; + + /* Allocate VRAM bo */ + if ((userspace_flags & KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE) || + (userspace_flags & KFD_IOC_ALLOC_MEM_FLAGS_APU_DEVICE)) { + kernel_allocation_flags = ALLOC_MEM_FLAGS_VRAM; + if ((userspace_flags & KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE) && + kfd_is_large_bar(dev)) + kernel_allocation_flags |= ALLOC_MEM_FLAGS_PUBLIC; + goto out; + } + /* + * Since currently user space library doesn't uses scratch + * allocation flag I route it to VRAM + */ + if ((userspace_flags & KFD_IOC_ALLOC_MEM_FLAGS_DGPU_SCRATCH) || + (userspace_flags & KFD_IOC_ALLOC_MEM_FLAGS_APU_SCRATCH)) { + kernel_allocation_flags = ALLOC_MEM_FLAGS_VRAM; + goto out; + } + /* + * The current usage for *_HOST allocation flags are for GTT memory + * Need to verify if we're node zero or we want to allocate bo on + * public domain for P2P buffers. + */ + if (userspace_flags & KFD_IOC_ALLOC_MEM_FLAGS_DGPU_HOST) { + kernel_allocation_flags = ALLOC_MEM_FLAGS_GTT; + goto out; + } + /* Allocate userptr BO */ + if (userspace_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { + kernel_allocation_flags = ALLOC_MEM_FLAGS_USERPTR; + goto out; + } + +out: + if (userspace_flags & KFD_IOC_ALLOC_MEM_FLAGS_DGPU_AQL_QUEUE_MEM) + kernel_allocation_flags |= ALLOC_MEM_FLAGS_AQL_QUEUE_MEM; + /* Current HW doesn't support non paged memory */ + kernel_allocation_flags |= ALLOC_MEM_FLAGS_NONPAGED; + /* + * Set by default execute access as this buffer might be allocated + * for CP's ring buffer + */ + kernel_allocation_flags |= ALLOC_MEM_FLAGS_EXECUTE_ACCESS; + kernel_allocation_flags |= ALLOC_MEM_FLAGS_NO_SUBSTITUTE; + + pr_debug("amdkfd: user allocation flags 0x%x kernel allocation flags: 0x%x\n", + userspace_flags, kernel_allocation_flags); + + return kernel_allocation_flags; +} + +static int kfd_ioctl_alloc_memory_of_gpu_new(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_alloc_memory_of_gpu_new_args *args = data; + struct kfd_process_device *pdd; + void *mem; + struct kfd_dev *dev; + int idr_handle; + long err; + uint64_t offset; + + if (args->size == 0) + return -EINVAL; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + down_write(&p->lock); + pdd = kfd_bind_process_to_device(dev, p); + up_write(&p->lock); + if (IS_ERR(pdd) < 0) + return PTR_ERR(pdd); + + offset = args->mmap_offset; + err = dev->kfd2kgd->alloc_memory_of_gpu( + dev->kgd, args->va_addr, args->size, + pdd->vm, (struct kgd_mem **) &mem, &offset, + NULL, pdd, + kfd_convert_user_mem_alloction_flags(dev, args->flags)); + + if (err != 0) + return err; + + down_write(&p->lock); + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, + args->va_addr, args->size); + up_write(&p->lock); + if (idr_handle < 0) { + dev->kfd2kgd->free_memory_of_gpu(dev->kgd, + (struct kgd_mem *) mem); + return -EFAULT; + } + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + if ((args->flags & KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE) != 0 && + !kfd_is_large_bar(dev)) { + args->mmap_offset = 0; + } else { + args->mmap_offset = KFD_MMAP_TYPE_MAP_BO; + args->mmap_offset |= KFD_MMAP_GPU_ID(args->gpu_id); + args->mmap_offset <<= PAGE_SHIFT; + args->mmap_offset |= offset; + } + + return 0; +} + +static int kfd_ioctl_free_memory_of_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_free_memory_of_gpu_args *args = data; + struct kfd_process_device *pdd; + struct kfd_bo *buf_obj; + struct kfd_dev *dev; + int ret; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (dev == NULL) + return -EINVAL; + + down_write(&p->lock); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + ret = -EINVAL; + goto err_unlock; + } + + buf_obj = kfd_process_device_find_bo(pdd, + GET_IDR_HANDLE(args->handle)); + if (buf_obj == NULL) { + ret = -EINVAL; + goto err_unlock; + } + run_rdma_free_callback(buf_obj); + + up_write(&p->lock); + + ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, buf_obj->mem); + + /* If freeing the buffer failed, leave the handle in place for + * clean-up during process tear-down. */ + if (ret == 0) { + down_write(&p->lock); + kfd_process_device_remove_obj_handle( + pdd, GET_IDR_HANDLE(args->handle)); + up_write(&p->lock); + } + + return ret; + +err_unlock: + up_write(&p->lock); + return ret; +} + +int kfd_map_memory_to_gpu(struct kfd_dev *dev, void *mem, + struct kfd_process *p, struct kfd_process_device *pdd) +{ + int err; + + BUG_ON(!dev); + BUG_ON(!pdd); + + err = dev->kfd2kgd->map_memory_to_gpu( + dev->kgd, (struct kgd_mem *) mem, pdd->vm); + + if (err != 0) + return err; + + radeon_flush_tlb(dev, p->pasid); + + err = dev->dqm->ops.set_page_directory_base(dev->dqm, &pdd->qpd); + if (err != 0) { + dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, + (struct kgd_mem *) mem, pdd->vm); + return err; + } + + return 0; +} + +static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_map_memory_to_gpu_new_args *args = data; + struct kfd_process_device *pdd, *peer_pdd; + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; + int i, num_dev; + uint32_t *devices_arr = NULL; + int bo_size; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (dev == NULL) + return -EINVAL; + + if (args->device_ids_array_size > 0 && + (args->device_ids_array_size < sizeof(uint32_t))) { + pr_err("amdkfd: err node IDs array size %u\n", + args->device_ids_array_size); + return -EFAULT; + } + + if (args->device_ids_array_size > 0) { + devices_arr = kmalloc(args->device_ids_array_size, GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, + (void __user *)args->device_ids_array, + args->device_ids_array_size); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; + } + } + + down_write(&p->lock); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd) < 0) { + err = PTR_ERR(pdd); + goto bind_process_to_device_failed; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->handle)); + up_write(&p->lock); + + if (mem == NULL) { + err = PTR_ERR(mem); + goto get_mem_obj_from_handle_failed; + } + + if (args->device_ids_array_size > 0) { + num_dev = args->device_ids_array_size / sizeof(uint32_t); + for (i = 0 ; i < num_dev; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + pr_err("amdkfd: didn't found kfd-dev for 0x%x\n", + devices_arr[i]); + err = -EFAULT; + goto get_mem_obj_from_handle_failed; + } + down_write(&p->lock); + peer_pdd = kfd_bind_process_to_device(peer, p); + up_write(&p->lock); + if (!peer_pdd) { + err = -EFAULT; + goto get_mem_obj_from_handle_failed; + } + err = kfd_map_memory_to_gpu(peer, mem, p, peer_pdd); + if (err != 0) + pr_err("amdkfd: failed to map\n"); + } + } else { + err = kfd_map_memory_to_gpu(dev, mem, p, pdd); + if (err != 0) + pr_err("amdkfd: failed to map\n"); + } + + bo_size = dev->kfd2kgd->return_bo_size(dev->kgd, mem); + down_write(&p->lock); + pdd->mapped_size += bo_size; + up_write(&p->lock); + + if (args->device_ids_array_size > 0 && devices_arr) + kfree(devices_arr); + + return err; + +bind_process_to_device_failed: + up_write(&p->lock); +get_mem_obj_from_handle_failed: +copy_from_user_failed: + kfree(devices_arr); + return err; +} + +static int kfd_ioctl_map_memory_to_gpu_wrapper(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_map_memory_to_gpu_args *args = data; + struct kfd_ioctl_map_memory_to_gpu_new_args new_args; + + new_args.handle = args->handle; + new_args.device_ids_array = NULL; + new_args.device_ids_array_size = 0; + + return kfd_ioctl_map_memory_to_gpu(filep, p, &new_args); +} + +static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_unmap_memory_from_gpu_new_args *args = data; + struct kfd_process_device *pdd, *peer_pdd; + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; + uint32_t *devices_arr = NULL, num_dev, i; + int bo_size; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (dev == NULL) + return -EINVAL; + + if (args->device_ids_array_size > 0 && + (args->device_ids_array_size < sizeof(uint32_t))) { + pr_err("amdkfd: err node IDs array size %u\n", + args->device_ids_array_size); + return -EFAULT; + } + + if (args->device_ids_array_size > 0) { + devices_arr = kmalloc(args->device_ids_array_size, GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, + (void __user *)args->device_ids_array, + args->device_ids_array_size); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; + } + } + + down_write(&p->lock); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + err = PTR_ERR(pdd); + goto bind_process_to_device_failed; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->handle)); + up_write(&p->lock); + + if (mem == NULL) { + err = PTR_ERR(mem); + goto get_mem_obj_from_handle_failed; + } + + if (args->device_ids_array_size > 0) { + num_dev = args->device_ids_array_size / sizeof(uint32_t); + for (i = 0 ; i < num_dev; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + err = -EFAULT; + goto get_mem_obj_from_handle_failed; + } + down_write(&p->lock); + peer_pdd = kfd_get_process_device_data(peer, p); + up_write(&p->lock); + if (!peer_pdd) { + err = -EFAULT; + goto get_mem_obj_from_handle_failed; + } + peer->kfd2kgd->unmap_memory_to_gpu(peer->kgd, + mem, peer_pdd->vm); + radeon_flush_tlb(peer, p->pasid); + } + } else { + dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm); + radeon_flush_tlb(dev, p->pasid); + } + + bo_size = dev->kfd2kgd->return_bo_size(dev->kgd, mem); + down_write(&p->lock); + pdd->mapped_size -= bo_size; + up_write(&p->lock); + + return 0; + +bind_process_to_device_failed: + up_write(&p->lock); +get_mem_obj_from_handle_failed: +copy_from_user_failed: + kfree(devices_arr); + return err; +} + +static int kfd_ioctl_unmap_memory_from_gpu_wrapper(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; + struct kfd_ioctl_unmap_memory_from_gpu_new_args new_args; + + new_args.handle = args->handle; + new_args.device_ids_array = NULL; + new_args.device_ids_array_size = 0; + + return kfd_ioctl_unmap_memory_from_gpu(filep, p, &new_args); +} + +static int kfd_ioctl_open_graphic_handle(struct file *filep, + struct kfd_process *p, + void *data) +{ + struct kfd_ioctl_open_graphic_handle_args *args = data; + struct kfd_dev *dev; + struct kfd_process_device *pdd; + void *mem; + int idr_handle; + long err; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family != CHIP_KAVERI) { + pr_debug("kfd_ioctl_open_graphic_handle only supported on KV\n"); + return -EINVAL; + } + + down_write(&p->lock); + pdd = kfd_bind_process_to_device(dev, p); + up_write(&p->lock); + if (IS_ERR(pdd) < 0) + return PTR_ERR(pdd); + + err = dev->kfd2kgd->open_graphic_handle(dev->kgd, + args->va_addr, + (struct kgd_vm *) pdd->vm, + args->graphic_device_fd, + args->graphic_handle, + (struct kgd_mem **) &mem); + + if (err != 0) + return err; + + down_write(&p->lock); + /*TODO: When open_graphic_handle is implemented, we need to create + * the corresponding interval tree. We need to know the size of + * the buffer through open_graphic_handle(). We use 1 for now.*/ + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, + args->va_addr, 1); + up_write(&p->lock); + if (idr_handle < 0) { + /* FIXME: destroy_process_gpumem doesn't seem to be + * implemented anywhere */ + dev->kfd2kgd->destroy_process_gpumem(dev->kgd, mem); + return -EFAULT; + } + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + + return 0; +} + +static int kfd_ioctl_set_process_dgpu_aperture(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_process_dgpu_aperture_args *args = data; + struct kfd_dev *dev; + struct kfd_process_device *pdd; + long err; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + down_write(&p->lock); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd) < 0) { + err = PTR_ERR(pdd); + goto exit; + } + + err = kfd_set_process_dgpu_aperture(pdd, args->dgpu_base, + args->dgpu_limit); + +exit: + up_write(&p->lock); + return err; +} + +static int kfd_ioctl_get_dmabuf_info(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_dmabuf_info_args *args = data; + struct kfd_dev *dev = NULL; + struct kgd_dev *dma_buf_kgd; + void *metadata_buffer = NULL; + uint32_t flags; + unsigned i; + int r; + + /* Find a KFD GPU device that supports the get_dmabuf_info query */ + for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) + if (dev && dev->kfd2kgd->get_dmabuf_info) + break; + if (!dev) + return -EINVAL; + + if (args->metadata_ptr) { + metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); + if (!metadata_buffer) + return -ENOMEM; + } + + /* Get dmabuf info from KGD */ + r = dev->kfd2kgd->get_dmabuf_info(dev->kgd, args->dmabuf_fd, + &dma_buf_kgd, &args->size, + metadata_buffer, args->metadata_size, + &args->metadata_size, &flags); + if (r) + goto exit; + + /* Reverse-lookup gpu_id from kgd pointer */ + dev = kfd_device_by_kgd(dma_buf_kgd); + if (!dev) { + r = -EINVAL; + goto exit; + } + args->gpu_id = kfd_get_gpu_id(dev); + + /* Translate flags */ + if (flags & ALLOC_MEM_FLAGS_VRAM) { + args->flags = KFD_IS_DGPU(dev->device_info->asic_family) ? + KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE : + KFD_IOC_ALLOC_MEM_FLAGS_APU_DEVICE; + } else + args->flags = KFD_IOC_ALLOC_MEM_FLAGS_DGPU_HOST; + + /* Copy metadata buffer to user mode */ + if (metadata_buffer) { + r = copy_to_user((void __user *)args->metadata_ptr, + metadata_buffer, args->metadata_size); + if (r != 0) + r = -EFAULT; + } + +exit: + kfree(metadata_buffer); + + return r; +} + +static int kfd_ioctl_import_dmabuf(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_import_dmabuf_args *args = data; + struct kfd_dev *dev; + struct kfd_process_device *pdd; + void *mem; + uint64_t size; + int idr_handle; + int r; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev || !dev->kfd2kgd->import_dmabuf) + return -EINVAL; + + down_write(&p->lock); + pdd = kfd_bind_process_to_device(dev, p); + up_write(&p->lock); + if (IS_ERR(pdd) < 0) + return PTR_ERR(pdd); + + r = dev->kfd2kgd->import_dmabuf(dev->kgd, args->dmabuf_fd, + args->va_addr, pdd->vm, + (struct kgd_mem **)&mem, &size); + if (r) + return r; + + down_write(&p->lock); + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, + args->va_addr, size); + up_write(&p->lock); + if (idr_handle < 0) { + dev->kfd2kgd->free_memory_of_gpu(dev->kgd, + (struct kgd_mem *)mem); + return -EFAULT; + } + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + + return 0; +} #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} @@ -899,10 +1829,65 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, kfd_ioctl_dbg_wave_control, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, + kfd_ioctl_alloc_memory_of_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, + kfd_ioctl_free_memory_of_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, + kfd_ioctl_map_memory_to_gpu_wrapper, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, + kfd_ioctl_unmap_memory_from_gpu_wrapper, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_OPEN_GRAPHIC_HANDLE, + kfd_ioctl_open_graphic_handle, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH, + kfd_ioctl_alloc_scratch_memory, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, + kfd_ioctl_set_cu_mask, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE, + kfd_ioctl_set_process_dgpu_aperture, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, + kfd_ioctl_set_trap_handler, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_NEW, + kfd_ioctl_alloc_memory_of_gpu_new, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU_NEW, + kfd_ioctl_map_memory_to_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU_NEW, + kfd_ioctl_unmap_memory_from_gpu, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, + kfd_ioctl_get_process_apertures_new, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_EVICT_MEMORY, + kfd_evict, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, + kfd_ioctl_get_dmabuf_info, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, + kfd_ioctl_import_dmabuf, 0) }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) +static int kfd_evict(struct file *filep, struct kfd_process *p, void *data) +{ + struct kfd_ioctl_eviction_args *args = data; + + return evict_size(p, args->size, args->type); + +} static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kfd_process *process; @@ -994,20 +1979,37 @@ err_i1: static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; + struct kfd_dev *kfd; + unsigned long vm_pgoff; + int retval; process = kfd_get_process(current); if (IS_ERR(process)) return PTR_ERR(process); - if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) == - KFD_MMAP_DOORBELL_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK; + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vma->vm_pgoff); + + switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { + case KFD_MMAP_TYPE_DOORBELL: return kfd_doorbell_mmap(process, vma); - } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) == - KFD_MMAP_EVENTS_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; + + case KFD_MMAP_TYPE_EVENTS: return kfd_event_mmap(process, vma); + + case KFD_MMAP_TYPE_MAP_BO: + kfd = kfd_device_by_id(KFD_MMAP_GPU_ID_GET(vm_pgoff)); + if (!kfd) + return -EFAULT; + retval = kfd->kfd2kgd->mmap_bo(kfd->kgd, vma); + return retval; + + case KFD_MMAP_TYPE_RESERVED_MEM: + return kfd_reserved_mem_mmap(process, vma); + } return -EFAULT; } + + |