From f7c826ad380b59baff190044c024b62091128145 Mon Sep 17 00:00:00 2001 From: Alexey Skidanov Date: Mon, 13 Oct 2014 16:35:12 +0300 Subject: drm/amdkfd: Add number of watch points to topology This patch adds the number of watch points to the node capabilities in the topology module Signed-off-by: Alexey Skidanov Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 155 ++++++++++++++++-------------- 3 files changed, 86 insertions(+), 71 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 43884ebd4303..436c31ca7710 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -32,6 +32,7 @@ static const struct kfd_device_info kaveri_device_info = { .max_pasid_bits = 16, .ih_ring_entry_size = 4 * sizeof(uint32_t), + .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f9fb81e3bb09..ba2bba8b5731 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -107,6 +107,7 @@ enum cache_policy { struct kfd_device_info { unsigned int max_pasid_bits; size_t ih_ring_entry_size; + uint8_t num_of_watch_points; uint16_t mqd_size_aligned; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index b11792d7e70e..4886dde7d1fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "kfd_priv.h" #include "kfd_crat.h" @@ -630,10 +631,10 @@ static struct kobj_type cache_type = { static ssize_t node_show(struct kobject *kobj, struct attribute *attr, char *buffer) { - ssize_t ret; struct kfd_topology_device *dev; char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; uint32_t i; + uint32_t log_max_watch_addr; /* Making sure that the buffer is an empty string */ buffer[0] = 0; @@ -641,8 +642,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, if (strcmp(attr->name, "gpu_id") == 0) { dev = container_of(attr, struct kfd_topology_device, attr_gpuid); - ret = sysfs_show_32bit_val(buffer, dev->gpu_id); - } else if (strcmp(attr->name, "name") == 0) { + return sysfs_show_32bit_val(buffer, dev->gpu_id); + } + + if (strcmp(attr->name, "name") == 0) { dev = container_of(attr, struct kfd_topology_device, attr_name); for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) { @@ -652,80 +655,90 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, break; } public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0; - ret = sysfs_show_str_val(buffer, public_name); - } else { - dev = container_of(attr, struct kfd_topology_device, - attr_props); - sysfs_show_32bit_prop(buffer, "cpu_cores_count", - dev->node_props.cpu_cores_count); - sysfs_show_32bit_prop(buffer, "simd_count", - dev->node_props.simd_count); - - if (dev->mem_bank_count < dev->node_props.mem_banks_count) { - pr_warn("kfd: mem_banks_count truncated from %d to %d\n", - dev->node_props.mem_banks_count, - dev->mem_bank_count); - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->mem_bank_count); - } else { - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->node_props.mem_banks_count); - } + return sysfs_show_str_val(buffer, public_name); + } - sysfs_show_32bit_prop(buffer, "caches_count", - dev->node_props.caches_count); - sysfs_show_32bit_prop(buffer, "io_links_count", - dev->node_props.io_links_count); - sysfs_show_32bit_prop(buffer, "cpu_core_id_base", - dev->node_props.cpu_core_id_base); - sysfs_show_32bit_prop(buffer, "simd_id_base", - dev->node_props.simd_id_base); - sysfs_show_32bit_prop(buffer, "capability", - dev->node_props.capability); - sysfs_show_32bit_prop(buffer, "max_waves_per_simd", - dev->node_props.max_waves_per_simd); - sysfs_show_32bit_prop(buffer, "lds_size_in_kb", - dev->node_props.lds_size_in_kb); - sysfs_show_32bit_prop(buffer, "gds_size_in_kb", - dev->node_props.gds_size_in_kb); - sysfs_show_32bit_prop(buffer, "wave_front_size", - dev->node_props.wave_front_size); - sysfs_show_32bit_prop(buffer, "array_count", - dev->node_props.array_count); - sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine", - dev->node_props.simd_arrays_per_engine); - sysfs_show_32bit_prop(buffer, "cu_per_simd_array", - dev->node_props.cu_per_simd_array); - sysfs_show_32bit_prop(buffer, "simd_per_cu", - dev->node_props.simd_per_cu); - sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu", - dev->node_props.max_slots_scratch_cu); - sysfs_show_32bit_prop(buffer, "vendor_id", - dev->node_props.vendor_id); - sysfs_show_32bit_prop(buffer, "device_id", - dev->node_props.device_id); - sysfs_show_32bit_prop(buffer, "location_id", - dev->node_props.location_id); - - if (dev->gpu) { - sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute", - kfd2kgd->get_max_engine_clock_in_mhz( - dev->gpu->kgd)); - sysfs_show_64bit_prop(buffer, "local_mem_size", - kfd2kgd->get_vmem_size(dev->gpu->kgd)); - - sysfs_show_32bit_prop(buffer, "fw_version", - kfd2kgd->get_fw_version( - dev->gpu->kgd, - KGD_ENGINE_MEC1)); + dev = container_of(attr, struct kfd_topology_device, + attr_props); + sysfs_show_32bit_prop(buffer, "cpu_cores_count", + dev->node_props.cpu_cores_count); + sysfs_show_32bit_prop(buffer, "simd_count", + dev->node_props.simd_count); + + if (dev->mem_bank_count < dev->node_props.mem_banks_count) { + pr_warn("kfd: mem_banks_count truncated from %d to %d\n", + dev->node_props.mem_banks_count, + dev->mem_bank_count); + sysfs_show_32bit_prop(buffer, "mem_banks_count", + dev->mem_bank_count); + } else { + sysfs_show_32bit_prop(buffer, "mem_banks_count", + dev->node_props.mem_banks_count); + } + sysfs_show_32bit_prop(buffer, "caches_count", + dev->node_props.caches_count); + sysfs_show_32bit_prop(buffer, "io_links_count", + dev->node_props.io_links_count); + sysfs_show_32bit_prop(buffer, "cpu_core_id_base", + dev->node_props.cpu_core_id_base); + sysfs_show_32bit_prop(buffer, "simd_id_base", + dev->node_props.simd_id_base); + sysfs_show_32bit_prop(buffer, "capability", + dev->node_props.capability); + sysfs_show_32bit_prop(buffer, "max_waves_per_simd", + dev->node_props.max_waves_per_simd); + sysfs_show_32bit_prop(buffer, "lds_size_in_kb", + dev->node_props.lds_size_in_kb); + sysfs_show_32bit_prop(buffer, "gds_size_in_kb", + dev->node_props.gds_size_in_kb); + sysfs_show_32bit_prop(buffer, "wave_front_size", + dev->node_props.wave_front_size); + sysfs_show_32bit_prop(buffer, "array_count", + dev->node_props.array_count); + sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine", + dev->node_props.simd_arrays_per_engine); + sysfs_show_32bit_prop(buffer, "cu_per_simd_array", + dev->node_props.cu_per_simd_array); + sysfs_show_32bit_prop(buffer, "simd_per_cu", + dev->node_props.simd_per_cu); + sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu", + dev->node_props.max_slots_scratch_cu); + sysfs_show_32bit_prop(buffer, "vendor_id", + dev->node_props.vendor_id); + sysfs_show_32bit_prop(buffer, "device_id", + dev->node_props.device_id); + sysfs_show_32bit_prop(buffer, "location_id", + dev->node_props.location_id); + + if (dev->gpu) { + log_max_watch_addr = + __ilog2_u32(dev->gpu->device_info->num_of_watch_points); + + if (log_max_watch_addr) { + dev->node_props.capability |= + HSA_CAP_WATCH_POINTS_SUPPORTED; + + dev->node_props.capability |= + ((log_max_watch_addr << + HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) & + HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); } - ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", - cpufreq_quick_get_max(0)/1000); + sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute", + kfd2kgd->get_max_engine_clock_in_mhz( + dev->gpu->kgd)); + sysfs_show_64bit_prop(buffer, "local_mem_size", + kfd2kgd->get_vmem_size(dev->gpu->kgd)); + + sysfs_show_32bit_prop(buffer, "fw_version", + kfd2kgd->get_fw_version( + dev->gpu->kgd, + KGD_ENGINE_MEC1)); } - return ret; + return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", + cpufreq_quick_get_max(0)/1000); } static const struct sysfs_ops node_ops = { -- cgit v1.2.3 From 093c7d8cfd2a26ff05e3bdff63d371147df1951c Mon Sep 17 00:00:00 2001 From: Alexey Skidanov Date: Tue, 18 Nov 2014 14:00:04 +0200 Subject: drm/amdkfd: Process-device data creation and lookup split This patch splits the current kfd_get_process_device_data() to two functions, one that specifically creates a pdd and another one which just do lookup. This is done to enhance the readability and maintainability of the code. Signed-off-by: Alexey Skidanov Signed-off-by: Oded Gabbay --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 4 --- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 7 ++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +-- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 40 +++++++++++++--------- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 14 +++++--- 6 files changed, 41 insertions(+), 30 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 924e90c072e5..fb94f1a2b911 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -75,7 +75,6 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) nybble = (pdd->lds_base >> 60) & 0x0E; return nybble; - } static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index b5791a5c7c06..1a9b355dd114 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -137,10 +137,6 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) if (dev == NULL) return -EINVAL; - /* Find if pdd exists for combination of process and gpu id */ - if (!kfd_get_process_device_data(dev, process, 0)) - return -EINVAL; - /* Calculate physical address of doorbell */ address = kfd_get_process_doorbells(dev, process); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index e64aa99e5e41..35b987574633 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -303,10 +303,11 @@ int kfd_init_apertures(struct kfd_process *process) while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && id < NUM_OF_SUPPORTED_GPUS) { - pdd = kfd_get_process_device_data(dev, process, 1); - if (!pdd) + pdd = kfd_create_process_device_data(dev, process); + if (pdd == NULL) { + pr_err("Failed to create process device data\n"); return -1; - + } /* * For 64 bit process aperture will be statically reserved in * the x86_64 non canonical process address space diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ba2bba8b5731..a2e053cff720 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -473,8 +473,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p); void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid); struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, - struct kfd_process *p, - int create_pdd); + struct kfd_process *p); +struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + struct kfd_process *p); /* Process device data iterator */ struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3c76ef05cbcf..a369c149d172 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -311,24 +311,29 @@ err_alloc_process: } struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, - struct kfd_process *p, - int create_pdd) + struct kfd_process *p) { struct kfd_process_device *pdd = NULL; list_for_each_entry(pdd, &p->per_device_data, per_device_list) if (pdd->dev == dev) - return pdd; - - if (create_pdd) { - pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); - if (pdd != NULL) { - pdd->dev = dev; - INIT_LIST_HEAD(&pdd->qpd.queues_list); - INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); - pdd->qpd.dqm = dev->dqm; - list_add(&pdd->per_device_list, &p->per_device_data); - } + break; + + return pdd; +} + +struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + struct kfd_process *p) +{ + struct kfd_process_device *pdd = NULL; + + pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); + if (pdd != NULL) { + pdd->dev = dev; + INIT_LIST_HEAD(&pdd->qpd.queues_list); + INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); + pdd->qpd.dqm = dev->dqm; + list_add(&pdd->per_device_list, &p->per_device_data); } return pdd; @@ -344,11 +349,14 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p) { - struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p, 1); + struct kfd_process_device *pdd; int err; - if (pdd == NULL) + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); return ERR_PTR(-ENOMEM); + } if (pdd->bound) return pdd; @@ -384,7 +392,7 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) pqm_uninit(&p->pqm); - pdd = kfd_get_process_device_data(dev, p, 0); + pdd = kfd_get_process_device_data(dev, p); /* * Just mark pdd as unbound, because we still need it to call diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 47526780d736..d12f9d32275b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -167,8 +167,11 @@ int pqm_create_queue(struct process_queue_manager *pqm, q = NULL; kq = NULL; - pdd = kfd_get_process_device_data(dev, pqm->process, 1); - BUG_ON(!pdd); + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return -1; + } retval = find_available_queue_slot(pqm, qid); if (retval != 0) @@ -273,8 +276,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dev = pqn->q->device; BUG_ON(!dev); - pdd = kfd_get_process_device_data(dev, pqm->process, 1); - BUG_ON(!pdd); + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return -1; + } if (pqn->kq) { /* destroy kernel queue (DIQ) */ -- cgit v1.2.3 From 85ea7d07e1ec62480c19bce6019a2816e153c74a Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Sat, 3 Jan 2015 22:12:29 +0200 Subject: drm/amd: Add SDMA functions to kfd-->kgd interface This patch adds three new functions to the kfd2kgd interface: - hqd_sdma_load() - Loads SDMA mqd to a H/W SDMA hqd slot. Used only in no HWS mode. - hqd_sdma_is_occupied() - Checks if an SDMA hqd slot is occupied. Used only in no HWS mode. - hqd_sdma_destroy() - Destructs and preempts the SDMA queue assigned to that SDMA hqd slot. Used only in no HWS mode. These functions are needed to support SDMA queues scheduling when using no HWS mode (used for debug or bring-up). v2: Removed init_sdma_engines() from interface. Initialization is done in radeon. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 47b551970a14..094631f61339 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -144,10 +144,18 @@ struct kgd2kfd_calls { * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp * sceduling mode. * + * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot. + * used only for no HWS mode. + * * @hqd_is_occupies: Checks if a hqd slot is occupied. * * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. * + * @hqd_sdma_is_occupied: Checks if an SDMA hqd slot is occupied. + * + * @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that + * SDMA hqd slot. + * * @get_fw_version: Returns FW versions from the header * * This structure contains function pointers to services that the kgd driver @@ -183,18 +191,26 @@ struct kfd2kgd_calls { int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); + int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); + bool (*hqd_is_occupies)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id); + + bool (*hqd_sdma_is_occupied)(struct kgd_dev *kgd, void *mqd); + + int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd, + unsigned int timeout); + uint16_t (*get_fw_version)(struct kgd_dev *kgd, enum kgd_engine_type type); }; bool kgd2kfd_init(unsigned interface_version, - const struct kfd2kgd_calls *f2g, - const struct kgd2kfd_calls **g2f); + const struct kfd2kgd_calls *f2g, + const struct kgd2kfd_calls **g2f); -#endif /* KGD_KFD_INTERFACE_H_INCLUDED */ +#endif /* KGD_KFD_INTERFACE_H_INCLUDED */ -- cgit v1.2.3 From a84a9903b5885ce1f0b258f8a6568f17d746ab44 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Sat, 3 Jan 2015 22:12:30 +0200 Subject: drm/radeon: Implement SDMA interface functions This patch implements the new SDMA interface functions. It also adds defines and structures related to SDMA registers. v2: Removed init_sdma_engines() from interface. Initialization is done in radeon. v3: - Removed unused defines. - Added SDMA_ prefix to defines that didn't have them. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/cik_reg.h | 169 +++++++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/radeon_kfd.c | 115 +++++++++++++++++++++++- 2 files changed, 281 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h index 79c45e8a536b..bbb8f2e43637 100644 --- a/drivers/gpu/drm/radeon/cik_reg.h +++ b/drivers/gpu/drm/radeon/cik_reg.h @@ -147,10 +147,42 @@ #define CIK_LB_DESKTOP_HEIGHT 0x6b0c +#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 + #define CP_HQD_IQ_RPTR 0xC970u #define AQL_ENABLE (1U << 0) - -#define IDLE (1 << 2) +#define SDMA0_RLC0_RB_CNTL 0xD400u +#define SDMA_RB_VMID(x) (x << 24) +#define SDMA0_RLC0_RB_BASE 0xD404u +#define SDMA0_RLC0_RB_BASE_HI 0xD408u +#define SDMA0_RLC0_RB_RPTR 0xD40Cu +#define SDMA0_RLC0_RB_WPTR 0xD410u +#define SDMA0_RLC0_RB_WPTR_POLL_CNTL 0xD414u +#define SDMA0_RLC0_RB_WPTR_POLL_ADDR_HI 0xD418u +#define SDMA0_RLC0_RB_WPTR_POLL_ADDR_LO 0xD41Cu +#define SDMA0_RLC0_RB_RPTR_ADDR_HI 0xD420u +#define SDMA0_RLC0_RB_RPTR_ADDR_LO 0xD424u +#define SDMA0_RLC0_IB_CNTL 0xD428u +#define SDMA0_RLC0_IB_RPTR 0xD42Cu +#define SDMA0_RLC0_IB_OFFSET 0xD430u +#define SDMA0_RLC0_IB_BASE_LO 0xD434u +#define SDMA0_RLC0_IB_BASE_HI 0xD438u +#define SDMA0_RLC0_IB_SIZE 0xD43Cu +#define SDMA0_RLC0_SKIP_CNTL 0xD440u +#define SDMA0_RLC0_CONTEXT_STATUS 0xD444u +#define SDMA_RLC_IDLE (1 << 2) +#define SDMA0_RLC0_DOORBELL 0xD448u +#define SDMA_OFFSET(x) (x << 0) +#define SDMA_DB_ENABLE (1 << 28) +#define SDMA0_RLC0_VIRTUAL_ADDR 0xD49Cu +#define SDMA_ATC (1 << 0) +#define SDMA_VA_PTR32 (1 << 4) +#define SDMA_VA_SHARED_BASE(x) (x << 8) +#define SDMA0_RLC0_APE1_CNTL 0xD4A0u +#define SDMA0_RLC0_DOORBELL_LOG 0xD4A4u +#define SDMA0_RLC0_WATERMARK 0xD4A8u +#define SDMA0_CNTL 0xD010 +#define SDMA1_CNTL 0xD810 struct cik_mqd { uint32_t header; @@ -283,4 +315,137 @@ struct cik_mqd { uint32_t queue_doorbell_id15; }; +struct cik_sdma_rlc_registers { + uint32_t sdma_rlc_rb_cntl; + uint32_t sdma_rlc_rb_base; + uint32_t sdma_rlc_rb_base_hi; + uint32_t sdma_rlc_rb_rptr; + uint32_t sdma_rlc_rb_wptr; + uint32_t sdma_rlc_rb_wptr_poll_cntl; + uint32_t sdma_rlc_rb_wptr_poll_addr_hi; + uint32_t sdma_rlc_rb_wptr_poll_addr_lo; + uint32_t sdma_rlc_rb_rptr_addr_hi; + uint32_t sdma_rlc_rb_rptr_addr_lo; + uint32_t sdma_rlc_ib_cntl; + uint32_t sdma_rlc_ib_rptr; + uint32_t sdma_rlc_ib_offset; + uint32_t sdma_rlc_ib_base_lo; + uint32_t sdma_rlc_ib_base_hi; + uint32_t sdma_rlc_ib_size; + uint32_t sdma_rlc_skip_cntl; + uint32_t sdma_rlc_context_status; + uint32_t sdma_rlc_doorbell; + uint32_t sdma_rlc_virtual_addr; + uint32_t sdma_rlc_ape1_cntl; + uint32_t sdma_rlc_doorbell_log; + uint32_t reserved_22; + uint32_t reserved_23; + uint32_t reserved_24; + uint32_t reserved_25; + uint32_t reserved_26; + uint32_t reserved_27; + uint32_t reserved_28; + uint32_t reserved_29; + uint32_t reserved_30; + uint32_t reserved_31; + uint32_t reserved_32; + uint32_t reserved_33; + uint32_t reserved_34; + uint32_t reserved_35; + uint32_t reserved_36; + uint32_t reserved_37; + uint32_t reserved_38; + uint32_t reserved_39; + uint32_t reserved_40; + uint32_t reserved_41; + uint32_t reserved_42; + uint32_t reserved_43; + uint32_t reserved_44; + uint32_t reserved_45; + uint32_t reserved_46; + uint32_t reserved_47; + uint32_t reserved_48; + uint32_t reserved_49; + uint32_t reserved_50; + uint32_t reserved_51; + uint32_t reserved_52; + uint32_t reserved_53; + uint32_t reserved_54; + uint32_t reserved_55; + uint32_t reserved_56; + uint32_t reserved_57; + uint32_t reserved_58; + uint32_t reserved_59; + uint32_t reserved_60; + uint32_t reserved_61; + uint32_t reserved_62; + uint32_t reserved_63; + uint32_t reserved_64; + uint32_t reserved_65; + uint32_t reserved_66; + uint32_t reserved_67; + uint32_t reserved_68; + uint32_t reserved_69; + uint32_t reserved_70; + uint32_t reserved_71; + uint32_t reserved_72; + uint32_t reserved_73; + uint32_t reserved_74; + uint32_t reserved_75; + uint32_t reserved_76; + uint32_t reserved_77; + uint32_t reserved_78; + uint32_t reserved_79; + uint32_t reserved_80; + uint32_t reserved_81; + uint32_t reserved_82; + uint32_t reserved_83; + uint32_t reserved_84; + uint32_t reserved_85; + uint32_t reserved_86; + uint32_t reserved_87; + uint32_t reserved_88; + uint32_t reserved_89; + uint32_t reserved_90; + uint32_t reserved_91; + uint32_t reserved_92; + uint32_t reserved_93; + uint32_t reserved_94; + uint32_t reserved_95; + uint32_t reserved_96; + uint32_t reserved_97; + uint32_t reserved_98; + uint32_t reserved_99; + uint32_t reserved_100; + uint32_t reserved_101; + uint32_t reserved_102; + uint32_t reserved_103; + uint32_t reserved_104; + uint32_t reserved_105; + uint32_t reserved_106; + uint32_t reserved_107; + uint32_t reserved_108; + uint32_t reserved_109; + uint32_t reserved_110; + uint32_t reserved_111; + uint32_t reserved_112; + uint32_t reserved_113; + uint32_t reserved_114; + uint32_t reserved_115; + uint32_t reserved_116; + uint32_t reserved_117; + uint32_t reserved_118; + uint32_t reserved_119; + uint32_t reserved_120; + uint32_t reserved_121; + uint32_t reserved_122; + uint32_t reserved_123; + uint32_t reserved_124; + uint32_t reserved_125; + uint32_t reserved_126; + uint32_t reserved_127; + uint32_t sdma_engine_id; + uint32_t sdma_queue_id; +}; + #endif diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 242fd8b1b221..0291681a6ff5 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -71,13 +71,16 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); - +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout); static const struct kfd2kgd_calls kfd2kgd = { .init_sa_manager = init_sa_manager, @@ -92,8 +95,11 @@ static const struct kfd2kgd_calls kfd2kgd = { .init_memory = kgd_init_memory, .init_pipeline = kgd_init_pipeline, .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, .hqd_is_occupies = kgd_hqd_is_occupies, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, .get_fw_version = get_fw_version }; @@ -435,11 +441,28 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, return 0; } +static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +{ + uint32_t retval; + + retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; + + pr_debug("kfd: sdma base address: 0x%x\n", retval); + + return retval; +} + static inline struct cik_mqd *get_mqd(void *mqd) { return (struct cik_mqd *)mqd; } +static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + return (struct cik_sdma_rlc_registers *)mqd; +} + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr) { @@ -517,6 +540,45 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +{ + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + write_register(kgd, + sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR, + m->sdma_rlc_virtual_addr); + + write_register(kgd, + sdma_base_addr + SDMA0_RLC0_RB_BASE, + m->sdma_rlc_rb_base); + + write_register(kgd, + sdma_base_addr + SDMA0_RLC0_RB_BASE_HI, + m->sdma_rlc_rb_base_hi); + + write_register(kgd, + sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdma_rlc_rb_rptr_addr_lo); + + write_register(kgd, + sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdma_rlc_rb_rptr_addr_hi); + + write_register(kgd, + sdma_base_addr + SDMA0_RLC0_DOORBELL, + m->sdma_rlc_doorbell); + + write_register(kgd, + sdma_base_addr + SDMA0_RLC0_RB_CNTL, + m->sdma_rlc_rb_cntl); + + return 0; +} + static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { @@ -538,6 +600,24 @@ static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, return retval; } +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + sdma_rlc_rb_cntl = read_register(kgd, + sdma_base_addr + SDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE) + return true; + + return false; +} + static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) @@ -566,6 +646,39 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, return 0; } +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int timeout) +{ + struct cik_sdma_rlc_registers *m; + uint32_t sdma_base_addr; + uint32_t temp; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + + temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA_RB_ENABLE; + write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = read_register(kgd, sdma_base_addr + + SDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA_RLC_IDLE) + break; + if (timeout == 0) + return -ETIME; + msleep(20); + timeout -= 20; + } + + write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0); + write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0); + write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0); + write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0); + + return 0; +} + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct radeon_device *rdev = (struct radeon_device *) kgd; -- cgit v1.2.3 From 77669eb87a904ee983d6c31563be20981837705d Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Sat, 3 Jan 2015 22:12:31 +0200 Subject: drm/amdkfd: Add SDMA mqd support This patch adds support for SDMA mqd operations: - init_mqd_sdma - uninit_mqd_sdma - load_mqd_sdma - update_mqd_sdma - destroy_mqd_sdma - is_occupied_sdma It also adds SDMA queue information to some private structures of amdkfd. v3: Use the new names of some of the defines. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 122 +++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 ++ 2 files changed, 130 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index adc31474e786..1c1fd3c765f7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -26,6 +26,7 @@ #include "kfd_priv.h" #include "kfd_mqd_manager.h" #include "cik_regs.h" +#include "../../radeon/cikd.h" #include "../../radeon/cik_reg.h" inline void busy_wait(unsigned long ms) @@ -111,6 +112,37 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, return retval; } +static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + struct cik_sdma_rlc_registers *m; + + BUG_ON(!mm || !mqd || !mqd_mem_obj); + + retval = kfd2kgd->allocate_mem(mm->dev->kgd, + sizeof(struct cik_sdma_rlc_registers), + 256, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr; + + memset(m, 0, sizeof(struct cik_sdma_rlc_registers)); + + *mqd = m; + if (gart_addr != NULL) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); + + return retval; +} + static void uninit_mqd(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { @@ -118,11 +150,24 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd, kfd2kgd->free_mem(mm->dev->kgd, (struct kgd_mem *) mqd_mem_obj); } +static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + BUG_ON(!mm || !mqd); + kfd2kgd->free_mem(mm->dev->kgd, (struct kgd_mem *) mqd_mem_obj); +} + static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr) { return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr); +} +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr) +{ + return kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); } static int update_mqd(struct mqd_manager *mm, void *mqd, @@ -170,6 +215,41 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, return 0; } +static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_sdma_rlc_registers *m; + + BUG_ON(!mm || !mqd || !q); + + m = get_sdma_mqd(mqd); + m->sdma_rlc_rb_cntl = + SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) | + SDMA_RB_VMID(q->vmid) | + SDMA_RPTR_WRITEBACK_ENABLE | + SDMA_RPTR_WRITEBACK_TIMER(6); + + m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE; + m->sdma_rlc_virtual_addr = q->sdma_vm_addr; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE; + q->is_active = true; + } + + return 0; +} + static int destroy_mqd(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout, uint32_t pipe_id, @@ -179,6 +259,18 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, pipe_id, queue_id); } +/* + * preempt type here is ignored because there is only one way + * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +} + static bool is_occupied(struct mqd_manager *mm, void *mqd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) @@ -189,6 +281,13 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, } +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +} + /* * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation. * The HIQ queue in Kaveri is using the same MQD structure as all the user mode @@ -301,6 +400,21 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, return 0; } +/* + * SDMA MQD Implementation + */ + +struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + struct cik_sdma_rlc_registers *m; + + BUG_ON(!mqd); + + m = (struct cik_sdma_rlc_registers *)mqd; + + return m; +} + struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev) { @@ -335,6 +449,14 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; break; + case KFD_MQD_TYPE_CIK_SDMA: + mqd->init_mqd = init_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; + break; default: kfree(mqd); return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a2e053cff720..87735d8df8e2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -286,6 +286,10 @@ struct queue_properties { bool is_active; /* Not relevant for user mode queues in cp scheduling */ unsigned int vmid; + /* Relevant only for sdma queues*/ + uint32_t sdma_engine_id; + uint32_t sdma_queue_id; + uint32_t sdma_vm_addr; }; /** @@ -328,6 +332,8 @@ struct queue { uint32_t pipe; uint32_t queue; + unsigned int sdma_id; + struct kfd_process *process; struct kfd_dev *device; }; @@ -530,6 +536,8 @@ int kfd_init_apertures(struct kfd_process *process); /* Queue Context Management */ inline uint32_t lower_32(uint64_t x); inline uint32_t upper_32(uint64_t x); +struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd); +inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m); int init_queue(struct queue **q, struct queue_properties properties); void uninit_queue(struct queue *q); -- cgit v1.2.3 From bcea308175748339b872cc50972e0a31c1999c64 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Sat, 3 Jan 2015 22:12:32 +0200 Subject: drm/amdkfd: Add SDMA user-mode queues support to QCM This patch adds support for SDMA user-mode queues to the QCM - the Queue management system that manages queues-per-device and queues-per-process. v2: Remove calls to interface function that initializes sdma engines. v3: Use the new names of some of the defines. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 159 +++++++++++++++++++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 5 + .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- 3 files changed, 154 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index fb94f1a2b911..7ead0802883d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -46,9 +46,24 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); + static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); +static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); + +static void deallocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int sdma_queue_id); + +static inline +enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) +{ + if (type == KFD_QUEUE_TYPE_SDMA) + return KFD_MQD_TYPE_CIK_SDMA; + return KFD_MQD_TYPE_CIK_CP; +} static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) { @@ -189,7 +204,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; - retval = create_compute_queue_nocpsch(dqm, q, qpd); + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) + retval = create_compute_queue_nocpsch(dqm, q, qpd); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + retval = create_sdma_queue_nocpsch(dqm, q, qpd); if (retval != 0) { if (list_empty(&qpd->queues_list)) { @@ -202,7 +220,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, list_add(&q->list, &qpd->queues_list); dqm->queue_count++; - + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; mutex_unlock(&dqm->lock); return 0; } @@ -279,8 +298,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q) { int retval; - struct mqd_manager *mqd; - + struct mqd_manager *mqd, *mqd_sdma; BUG_ON(!dqm || !q || !q->mqd || !qpd); retval = 0; @@ -294,6 +312,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, goto out; } + mqd_sdma = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA); + if (mqd_sdma == NULL) { + mutex_unlock(&dqm->lock); + return -ENOMEM; + } + retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, @@ -302,7 +326,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, if (retval != 0) goto out; - deallocate_hqd(dqm, q); + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) + deallocate_hqd(dqm, q); + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + dqm->sdma_queue_count--; + deallocate_sdma_queue(dqm, q->sdma_id); + } mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); @@ -323,7 +352,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) BUG_ON(!dqm || !q || !q->mqd); mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + mqd = dqm->get_mqd_manager(dqm, q->properties.type); if (mqd == NULL) { mutex_unlock(&dqm->lock); return -ENOMEM; @@ -526,7 +555,6 @@ static int init_pipelines(struct device_queue_manager *dqm, return 0; } - static int init_scheduler(struct device_queue_manager *dqm) { int retval; @@ -556,6 +584,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; + dqm->sdma_queue_count = 0; dqm->allocated_queues = kcalloc(get_pipes_num(dqm), sizeof(unsigned int), GFP_KERNEL); if (!dqm->allocated_queues) { @@ -567,6 +596,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; + dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; init_scheduler(dqm); return 0; @@ -598,6 +628,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm) return 0; } +static int allocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int *sdma_queue_id) +{ + int bit; + + if (dqm->sdma_bitmap == 0) + return -ENOMEM; + + bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap, + CIK_SDMA_QUEUES); + + clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap); + *sdma_queue_id = bit; + + return 0; +} + +static void deallocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int sdma_queue_id) +{ + if (sdma_queue_id < 0 || sdma_queue_id >= CIK_SDMA_QUEUES) + return; + set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); +} + +static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + uint32_t value = SDMA_ATC; + + if (q->process->is_32bit_user_mode) + value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); + else + value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64( + qpd_to_pdd(qpd))); + q->properties.sdma_vm_addr = value; +} + +static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + struct mqd_manager *mqd; + int retval; + + mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA); + if (!mqd) + return -ENOMEM; + + retval = allocate_sdma_queue(dqm, &q->sdma_id); + if (retval != 0) + return retval; + + q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; + + pr_debug("kfd: sdma id is: %d\n", q->sdma_id); + pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); + pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); + + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval != 0) { + deallocate_sdma_queue(dqm, q->sdma_id); + return retval; + } + + init_sdma_vm(dqm, q, qpd); + return 0; +} + /* * Device Queue Manager implementation for cp scheduler */ @@ -639,6 +740,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; + dqm->sdma_queue_count = 0; dqm->active_runlist = false; retval = init_pipelines(dqm, get_pipes_num(dqm), 0); if (retval != 0) @@ -682,7 +784,6 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm->fence_addr = dqm->fence_mem->cpu_ptr; dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; - list_for_each_entry(node, &dqm->queues, list) if (node->qpd->pqm->process && dqm->dev) kfd_bind_process_to_device(dqm->dev, @@ -753,6 +854,14 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, mutex_unlock(&dqm->lock); } +static void select_sdma_engine_id(struct queue *q) +{ + static int sdma_id; + + q->sdma_id = sdma_id; + sdma_id = (sdma_id + 1) % 2; +} + static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, int *allocate_vmid) { @@ -768,7 +877,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mutex_lock(&dqm->lock); - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + select_sdma_engine_id(q); + + mqd = dqm->get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (mqd == NULL) { mutex_unlock(&dqm->lock); return -ENOMEM; @@ -785,6 +899,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = execute_queues_cpsch(dqm, false); } + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; + out: mutex_unlock(&dqm->lock); return retval; @@ -808,6 +925,14 @@ static int fence_wait_timeout(unsigned int *fence_addr, return 0; } +static int destroy_sdma_queues(struct device_queue_manager *dqm, + unsigned int sdma_engine) +{ + return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, + KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, + sdma_engine); +} + static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) { int retval; @@ -820,6 +945,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) mutex_lock(&dqm->lock); if (dqm->active_runlist == false) goto out; + + pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n", + dqm->sdma_queue_count); + + if (dqm->sdma_queue_count > 0) { + destroy_sdma_queues(dqm, 0); + destroy_sdma_queues(dqm, 1); + } + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); if (retval != 0) @@ -891,13 +1025,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, /* remove queue from list to prevent rescheduling after preemption */ mutex_lock(&dqm->lock); - - mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); + mqd = dqm->get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; goto failed; } + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count--; + list_del(&q->list); dqm->queue_count--; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index c3f189e8ae35..554c06ee8892 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -36,6 +36,9 @@ #define KFD_VMID_START_OFFSET (8) #define VMID_PER_DEVICE CIK_VMID_NUM #define KFD_DQM_FIRST_PIPE (0) +#define CIK_SDMA_QUEUES (4) +#define CIK_SDMA_QUEUES_PER_ENGINE (2) +#define CIK_SDMA_ENGINE_NUM (2) struct device_process_node { struct qcm_process_device *qpd; @@ -130,8 +133,10 @@ struct device_queue_manager { struct list_head queues; unsigned int processes_count; unsigned int queue_count; + unsigned int sdma_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; + unsigned int sdma_bitmap; unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index d12f9d32275b..948b1ca8e7a2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -128,7 +128,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, /* let DQM handle it*/ q_properties->vmid = 0; q_properties->queue_id = qid; - q_properties->type = KFD_QUEUE_TYPE_COMPUTE; retval = init_queue(q, *q_properties); if (retval != 0) @@ -189,6 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, } switch (type) { + case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && -- cgit v1.2.3 From 3385f9dd64d44f6adefb6f4680658d1e43bac9c9 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Sat, 3 Jan 2015 22:12:33 +0200 Subject: drm/amdkfd: Identify SDMA queue in create queue ioctl This patch adds a check to the create queue ioctl path, which identifies SDMA queue type that is sent by userspace. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 7d4974b83af7..3dfce4336eed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -192,6 +192,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) + q_properties->type = KFD_QUEUE_TYPE_SDMA; else return -ENOTSUPP; -- cgit v1.2.3 From 85dfaef34179a6449ebce34a1a9f1c032c3e1b88 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Sat, 3 Jan 2015 22:12:34 +0200 Subject: drm/amdkfd: Pass queue type to pqm_create_queue() This patch passes the correct queue type to pqm_create_queue() instead of a fixed KFD_QUEUE_TYPE_COMPUTE type. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 3dfce4336eed..4c0b1e42e405 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -260,8 +260,8 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0, - KFD_QUEUE_TYPE_COMPUTE, &queue_id); + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, + 0, q_properties.type, &queue_id); if (err != 0) goto err_create_queue; -- cgit v1.2.3 From d7a60d8ea5cd4560e0496d2683643d2e4930e609 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Sat, 3 Jan 2015 22:12:35 +0200 Subject: drm/radeon: Enable sdma preemption This patch adds to radeon the enablement of sdma preemption. This is needed to support HWS of SDMA user-mode queues. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/cik_sdma.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index dde5c7e29eb2..1f4ded181662 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -282,6 +282,33 @@ static void cik_sdma_rlc_stop(struct radeon_device *rdev) /* XXX todo */ } +/** + * cik_sdma_ctx_switch_enable - enable/disable sdma engine preemption + * + * @rdev: radeon_device pointer + * @enable: enable/disable preemption. + * + * Halt or unhalt the async dma engines (CIK). + */ +void cik_sdma_ctx_switch_enable(struct radeon_device *rdev, bool enable) +{ + uint32_t reg_offset, value; + int i; + + for (i = 0; i < 2; i++) { + if (i == 0) + reg_offset = SDMA0_REGISTER_OFFSET; + else + reg_offset = SDMA1_REGISTER_OFFSET; + value = RREG32(SDMA0_CNTL + reg_offset); + if (enable) + value |= AUTO_CTXSW_ENABLE; + else + value &= ~AUTO_CTXSW_ENABLE; + WREG32(SDMA0_CNTL + reg_offset, value); + } +} + /** * cik_sdma_enable - stop the async dma engines * @@ -312,6 +339,8 @@ void cik_sdma_enable(struct radeon_device *rdev, bool enable) me_cntl |= SDMA_HALT; WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl); } + + cik_sdma_ctx_switch_enable(rdev, enable); } /** -- cgit v1.2.3 From e27ade73fd38055bd6b374ff86fcd02c0f22b3f3 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 26 Oct 2014 20:45:45 +0200 Subject: drm/amd: Add new kfd-->kgd interface for gart usage This patch adds two new functions to the kfd-->kgd interface: init_gtt_mem_allocation, which allocate a large enough buffer on the amdkfd needs, such as mqds, hpds, kernel queue, fence and runlists. This function is only called once per GPU device. The size of the allocated buffer is based on the maximum number of HSA processes and maximum number of queues per HSA process (two amdkfd kernel module parameters). free_gtt_mem, which frees a buffer that was allocated on the gart aperture. Signed-off-by: Oded Gabbay Reviewed-by: Alexey Skidanov Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 094631f61339..3a1219005ad5 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -122,6 +122,11 @@ struct kgd2kfd_calls { * * @free_mem: Frees a buffer that was allocated by amdkfd's sa manager * + * @init_gtt_mem_allocation: Allocate a buffer on the gart aperture. + * The buffer can be used for mqds, hpds, kernel queue, fence and runlists + * + * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture + * * @get_vmem_size: Retrieves (physical) size of VRAM * * @get_gpu_clock_counter: Retrieves GPU clock counter @@ -168,8 +173,12 @@ struct kfd2kgd_calls { void (*fini_sa_manager)(struct kgd_dev *kgd); int (*allocate_mem)(struct kgd_dev *kgd, size_t size, size_t alignment, enum kgd_memory_pool pool, struct kgd_mem **mem); + int (*init_gtt_mem_allocation)(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr); void (*free_mem)(struct kgd_dev *kgd, struct kgd_mem *mem); + void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj); uint64_t (*get_vmem_size)(struct kgd_dev *kgd); uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); -- cgit v1.2.3 From ceae881bfa4906db895f2e30872e737a49246830 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 26 Oct 2014 20:52:55 +0200 Subject: drm/radeon: Impl. new gtt allocate/free functions This patch adds the implementation of the gtt interface functions. The allocate function will allocate a single bo, pin and map it to kernel memory. It will return the gpu address and cpu ptr as arguments. v2: The bulk of the allocations in the GART is for MQDs. MQDs represent active user-mode queues, which are on the current runlist. It is important to remember that active queues doesn't necessarily mean scheduled/running queues, especially if there is over-subscription of queues or more than a single HSA process. Because the scheduling of the user-mode queues is done by the CP firmware, amdkfd doesn't have any indication if the queue is scheduled or not. If the CP will try to schedule a queue, and its MQD is not present, this will probably stuck the CP permanently, as it will load garbage from the GART (the address of the MQD is given to the CP inside the runlist packet). In addition, there are a couple of small allocations which also should always be pinned - runlist packets (2 packets) and HPDs. runlist packets can be quite large, depending on number of processes and queues. This new allocate function represents the short/mid-term solution of limiting the total memory consumption to around 4MB by default. The long-term solution is to create a mechanism through which radeon/ttm can ask amdkfd to clear GART/VRAM memory due to memory pressure. Then, amdkfd will preempt the running queues and wait until the memory pressure is over. After that, amdkfd will reschedule the queues. Signed-off-by: Oded Gabbay Reviewed-by: Alexey Skidanov Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_kfd.c | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 0291681a6ff5..2d604ed16b7d 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -37,6 +37,8 @@ struct kgd_mem { struct radeon_sa_bo *sa_bo; uint64_t gpu_addr; void *ptr; + struct radeon_bo *bo; + void *cpu_ptr; }; static int init_sa_manager(struct kgd_dev *kgd, unsigned int size); @@ -47,6 +49,12 @@ static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment, static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem); +static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr); + +static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); + static uint64_t get_vmem_size(struct kgd_dev *kgd); static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); @@ -87,6 +95,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .fini_sa_manager = fini_sa_manager, .allocate_mem = allocate_mem, .free_mem = free_mem, + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, .get_vmem_size = get_vmem_size, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, @@ -272,6 +282,81 @@ static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem) kfree(mem); } +static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr) +{ + struct radeon_device *rdev = (struct radeon_device *)kgd; + struct kgd_mem **mem = (struct kgd_mem **) mem_obj; + int r; + + BUG_ON(kgd == NULL); + BUG_ON(gpu_addr == NULL); + BUG_ON(cpu_ptr == NULL); + + *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if ((*mem) == NULL) + return -ENOMEM; + + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, + RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo); + if (r) { + dev_err(rdev->dev, + "failed to allocate BO for amdkfd (%d)\n", r); + return r; + } + + /* map the buffer */ + r = radeon_bo_reserve((*mem)->bo, true); + if (r) { + dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); + goto allocate_mem_reserve_bo_failed; + } + + r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT, + &(*mem)->gpu_addr); + if (r) { + dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); + goto allocate_mem_pin_bo_failed; + } + *gpu_addr = (*mem)->gpu_addr; + + r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); + if (r) { + dev_err(rdev->dev, + "(%d) failed to map bo to kernel for amdkfd\n", r); + goto allocate_mem_kmap_bo_failed; + } + *cpu_ptr = (*mem)->cpu_ptr; + + radeon_bo_unreserve((*mem)->bo); + + return 0; + +allocate_mem_kmap_bo_failed: + radeon_bo_unpin((*mem)->bo); +allocate_mem_pin_bo_failed: + radeon_bo_unreserve((*mem)->bo); +allocate_mem_reserve_bo_failed: + radeon_bo_unref(&(*mem)->bo); + + return r; +} + +static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) +{ + struct kgd_mem *mem = (struct kgd_mem *) mem_obj; + + BUG_ON(mem == NULL); + + radeon_bo_reserve(mem->bo, true); + radeon_bo_kunmap(mem->bo); + radeon_bo_unpin(mem->bo); + radeon_bo_unreserve(mem->bo); + radeon_bo_unref(&(mem->bo)); + kfree(mem); +} + static uint64_t get_vmem_size(struct kgd_dev *kgd) { struct radeon_device *rdev = (struct radeon_device *)kgd; -- cgit v1.2.3 From 36b5c08f099a7b4d72a08784ab3efff592de2463 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 26 Oct 2014 09:53:10 +0200 Subject: drm/amdkfd: Add gtt sa related data to kfd_dev struct This patch adds new fields to kfd_dev struct that are necessary for the new kfd gtt sa module Signed-off-by: Oded Gabbay Reviewed-by: Alexey Skidanov Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 87735d8df8e2..2be9405bea1d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -111,6 +111,13 @@ struct kfd_device_info { uint16_t mqd_size_aligned; }; +struct kfd_mem_obj { + uint32_t range_start; + uint32_t range_end; + uint64_t gpu_addr; + uint32_t *cpu_ptr; +}; + struct kfd_dev { struct kgd_dev *kgd; @@ -136,6 +143,14 @@ struct kfd_dev { struct kgd2kfd_shared_resources shared_resources; + void *gtt_mem; + uint64_t gtt_start_gpu_addr; + void *gtt_start_cpu_ptr; + void *gtt_sa_bitmap; + struct mutex gtt_sa_lock; + unsigned int gtt_sa_chunk_size; + unsigned int gtt_sa_num_of_chunks; + void *interrupt_ring; size_t interrupt_ring_size; atomic_t interrupt_ring_rptr; @@ -163,12 +178,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd); extern const struct kfd2kgd_calls *kfd2kgd; -struct kfd_mem_obj { - void *bo; - uint64_t gpu_addr; - uint32_t *cpu_ptr; -}; - enum kfd_mempool { KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, -- cgit v1.2.3 From 6e81090b2ec4db256b08fab232e0d247aadf1bc5 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 27 Oct 2014 14:36:07 +0200 Subject: drm/amdkfd: Add kfd gtt sub-allocator functions This patch adds new kfd gtt sub-allocator functions that service the amdkfd driver when it wants to use gtt memory. The sub-allocator uses a bitmap to handle the memory area that was transferred to it during init. It divides the memory area into chunks, according to chunk size parameter. The allocation function will allocate contiguous chunks from that memory area, according to the requested size. If the requested size is smaller than the chunk size, a single chunk will be allocated. v2: Do some more verifications on parameters that are passed into kfd_gtt_sa_init() Signed-off-by: Oded Gabbay Reviewed-by: Alexey Skidanov Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 186 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 ++ 2 files changed, 193 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 436c31ca7710..33c30dc21d67 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -67,6 +67,10 @@ static const struct kfd_deviceid supported_devices[] = { { 0x131D, &kaveri_device_info }, /* Kaveri */ }; +static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, + unsigned int chunk_size); +static void kfd_gtt_sa_fini(struct kfd_dev *kfd); + static const struct kfd_device_info *lookup_device_info(unsigned short did) { size_t i; @@ -307,3 +311,185 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock(&kfd->interrupt_lock); } } + +static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, + unsigned int chunk_size) +{ + unsigned int num_of_bits; + + BUG_ON(!kfd); + BUG_ON(!kfd->gtt_mem); + BUG_ON(buf_size < chunk_size); + BUG_ON(buf_size == 0); + BUG_ON(chunk_size == 0); + + kfd->gtt_sa_chunk_size = chunk_size; + kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; + + num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE; + BUG_ON(num_of_bits == 0); + + kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL); + + if (!kfd->gtt_sa_bitmap) + return -ENOMEM; + + pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", + kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); + + mutex_init(&kfd->gtt_sa_lock); + + return 0; + +} + +static void kfd_gtt_sa_fini(struct kfd_dev *kfd) +{ + mutex_destroy(&kfd->gtt_sa_lock); + kfree(kfd->gtt_sa_bitmap); +} + +static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, + unsigned int bit_num, + unsigned int chunk_size) +{ + return start_addr + bit_num * chunk_size; +} + +static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr, + unsigned int bit_num, + unsigned int chunk_size) +{ + return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size); +} + +int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + struct kfd_mem_obj **mem_obj) +{ + unsigned int found, start_search, cur_size; + + BUG_ON(!kfd); + + if (size == 0) + return -EINVAL; + + if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) + return -ENOMEM; + + *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if ((*mem_obj) == NULL) + return -ENOMEM; + + pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size); + + start_search = 0; + + mutex_lock(&kfd->gtt_sa_lock); + +kfd_gtt_restart_search: + /* Find the first chunk that is free */ + found = find_next_zero_bit(kfd->gtt_sa_bitmap, + kfd->gtt_sa_num_of_chunks, + start_search); + + pr_debug("kfd: found = %d\n", found); + + /* If there wasn't any free chunk, bail out */ + if (found == kfd->gtt_sa_num_of_chunks) + goto kfd_gtt_no_free_chunk; + + /* Update fields of mem_obj */ + (*mem_obj)->range_start = found; + (*mem_obj)->range_end = found; + (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr( + kfd->gtt_start_gpu_addr, + found, + kfd->gtt_sa_chunk_size); + (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr( + kfd->gtt_start_cpu_ptr, + found, + kfd->gtt_sa_chunk_size); + + pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n", + (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); + + /* If we need only one chunk, mark it as allocated and get out */ + if (size <= kfd->gtt_sa_chunk_size) { + pr_debug("kfd: single bit\n"); + set_bit(found, kfd->gtt_sa_bitmap); + goto kfd_gtt_out; + } + + /* Otherwise, try to see if we have enough contiguous chunks */ + cur_size = size - kfd->gtt_sa_chunk_size; + do { + (*mem_obj)->range_end = + find_next_zero_bit(kfd->gtt_sa_bitmap, + kfd->gtt_sa_num_of_chunks, ++found); + /* + * If next free chunk is not contiguous than we need to + * restart our search from the last free chunk we found (which + * wasn't contiguous to the previous ones + */ + if ((*mem_obj)->range_end != found) { + start_search = found; + goto kfd_gtt_restart_search; + } + + /* + * If we reached end of buffer, bail out with error + */ + if (found == kfd->gtt_sa_num_of_chunks) + goto kfd_gtt_no_free_chunk; + + /* Check if we don't need another chunk */ + if (cur_size <= kfd->gtt_sa_chunk_size) + cur_size = 0; + else + cur_size -= kfd->gtt_sa_chunk_size; + + } while (cur_size > 0); + + pr_debug("kfd: range_start = %d, range_end = %d\n", + (*mem_obj)->range_start, (*mem_obj)->range_end); + + /* Mark the chunks as allocated */ + for (found = (*mem_obj)->range_start; + found <= (*mem_obj)->range_end; + found++) + set_bit(found, kfd->gtt_sa_bitmap); + +kfd_gtt_out: + mutex_unlock(&kfd->gtt_sa_lock); + return 0; + +kfd_gtt_no_free_chunk: + pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj); + mutex_unlock(&kfd->gtt_sa_lock); + kfree(mem_obj); + return -ENOMEM; +} + +int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) +{ + unsigned int bit; + + BUG_ON(!kfd); + BUG_ON(!mem_obj); + + pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n", + mem_obj, mem_obj->range_start, mem_obj->range_end); + + mutex_lock(&kfd->gtt_sa_lock); + + /* Mark the chunks as free */ + for (bit = mem_obj->range_start; + bit <= mem_obj->range_end; + bit++) + clear_bit(bit, kfd->gtt_sa_bitmap); + + mutex_unlock(&kfd->gtt_sa_lock); + + kfree(mem_obj); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2be9405bea1d..a79c21781d3b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -518,6 +518,13 @@ unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, struct kfd_process *process, unsigned int queue_id); +/* GTT Sub-Allocator */ + +int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, + struct kfd_mem_obj **mem_obj); + +int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj); + extern struct device *kfd_device; /* Topology */ -- cgit v1.2.3 From e18e794e6ba02e94edb386e6fcd4217773cb0ac8 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 26 Oct 2014 10:12:22 +0200 Subject: drm/amdkfd: Fixed calculation of gart buffer size This patch makes the gart's buffer size calculation more accurate. This buffer is needed per GPU. It takes into account maximum number of MQDs, runlist packets, kernel queues and reserves 512KB for other misc allocations. The total size is just shy of 4MB, for 32 processes and 128 queues per process, which are the defaults for amdkfd kernel module parameters. Signed-off-by: Oded Gabbay Reviewed-by: Alexey Skidanov Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 33c30dc21d67..fdc7dff2bfa0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -26,6 +26,7 @@ #include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers.h" #define MQD_SIZE_ALIGNED 768 @@ -178,16 +179,31 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, max_num_of_queues_per_process * kfd->device_info->mqd_size_aligned; - /* add another 512KB for all other allocations on gart */ + /* + * calculate max size of runlist packet. + * There can be only 2 packets at once + */ + size += (max_num_of_processes * sizeof(struct pm4_map_process) + + max_num_of_processes * max_num_of_queues_per_process * + sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2; + + /* Add size of HIQ & DIQ */ + size += KFD_KERNEL_QUEUE_SIZE * 2; + + /* add another 512KB for all other allocations on gart (HPD, fences) */ size += 512 * 1024; if (kfd2kgd->init_sa_manager(kfd->kgd, size)) { dev_err(kfd_device, - "Error initializing sa manager for device (%x:%x)\n", - kfd->pdev->vendor, kfd->pdev->device); + "Could not allocate %d bytes for device (%x:%x)\n", + size, kfd->pdev->vendor, kfd->pdev->device); goto out; } + dev_info(kfd_device, + "Allocated %d bytes on gart for device(%x:%x)\n", + size, kfd->pdev->vendor, kfd->pdev->device); + kfd_doorbell_init(kfd); if (kfd_topology_add_device(kfd) != 0) { -- cgit v1.2.3 From 73a1da0bb3b32a552817c57dcaebef09bd2f3677 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 26 Oct 2014 09:53:37 +0200 Subject: drm/amdkfd: Allocate gart memory using new interface This patch changes the calls to allocate the gart memory for amdkfd from the old interface (radeon_sa) to the new one (kfd_gtt_sa) The new gart sub-allocator is initialized with chunk size equal to 512 bytes. This is because the KV MQD is 512 Bytes and most of the sub-allocations are MQDs. Signed-off-by: Oded Gabbay Reviewed-by: Alexey Skidanov Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index fdc7dff2bfa0..994a9c1bdd04 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -193,7 +193,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, /* add another 512KB for all other allocations on gart (HPD, fences) */ size += 512 * 1024; - if (kfd2kgd->init_sa_manager(kfd->kgd, size)) { + if (kfd2kgd->init_gtt_mem_allocation(kfd->kgd, size, &kfd->gtt_mem, + &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)) { dev_err(kfd_device, "Could not allocate %d bytes for device (%x:%x)\n", size, kfd->pdev->vendor, kfd->pdev->device); @@ -204,6 +205,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, "Allocated %d bytes on gart for device(%x:%x)\n", size, kfd->pdev->vendor, kfd->pdev->device); + /* Initialize GTT sa with 512 byte chunk size */ + if (kfd_gtt_sa_init(kfd, size, 512) != 0) { + dev_err(kfd_device, + "Error initializing gtt sub-allocator\n"); + goto kfd_gtt_sa_init_error; + } + kfd_doorbell_init(kfd); if (kfd_topology_add_device(kfd) != 0) { @@ -262,7 +270,9 @@ device_iommu_pasid_error: kfd_interrupt_error: kfd_topology_remove_device(kfd); kfd_topology_add_device_error: - kfd2kgd->fini_sa_manager(kfd->kgd); + kfd_gtt_sa_fini(kfd); +kfd_gtt_sa_init_error: + kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); dev_err(kfd_device, "device (%x:%x) NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); @@ -277,6 +287,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) amd_iommu_free_device(kfd->pdev); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); + kfd_gtt_sa_fini(kfd); + kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); } kfree(kfd); -- cgit v1.2.3 From a86aa3ca5a2f16772653782c078f62a7d76dd57e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 26 Oct 2014 22:00:31 +0200 Subject: drm/amdkfd: Using new gtt sa in amdkfd This patch change the calls throughout the amdkfd driver from the old kfd-->kgd interface to the new kfd gtt sa inside amdkfd v2: change the new call in sdma code that appeared because of the sdma feature Signed-off-by: Oded Gabbay Reviewed-by: Alexey Skidanov Reviewed-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 23 ++++-------- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 41 ++++++++-------------- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 24 +++++-------- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 10 ++---- 4 files changed, 33 insertions(+), 65 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 7ead0802883d..6806e64c5ffd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -519,11 +519,8 @@ static int init_pipelines(struct device_queue_manager *dqm, * because it contains no data when there are no active queues. */ - err = kfd2kgd->allocate_mem(dqm->dev->kgd, - CIK_HPD_EOP_BYTES * pipes_num, - PAGE_SIZE, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &dqm->pipeline_mem); + err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num, + &dqm->pipeline_mem); if (err) { pr_err("kfd: error allocate vidmem num pipes: %d\n", @@ -538,8 +535,7 @@ static int init_pipelines(struct device_queue_manager *dqm, mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); if (mqd == NULL) { - kfd2kgd->free_mem(dqm->dev->kgd, - (struct kgd_mem *) dqm->pipeline_mem); + kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); return -ENOMEM; } @@ -614,8 +610,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm) for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) kfree(dqm->mqds[i]); mutex_destroy(&dqm->lock); - kfd2kgd->free_mem(dqm->dev->kgd, - (struct kgd_mem *) dqm->pipeline_mem); + kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); } static int start_nocpsch(struct device_queue_manager *dqm) @@ -773,11 +768,8 @@ static int start_cpsch(struct device_queue_manager *dqm) pr_debug("kfd: allocating fence memory\n"); /* allocate fence memory on the gart */ - retval = kfd2kgd->allocate_mem(dqm->dev->kgd, - sizeof(*dqm->fence_addr), - 32, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &dqm->fence_mem); + retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), + &dqm->fence_mem); if (retval != 0) goto fail_allocate_vidmem; @@ -812,8 +804,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) pdd = qpd_to_pdd(node->qpd); pdd->bound = false; } - kfd2kgd->free_mem(dqm->dev->kgd, - (struct kgd_mem *) dqm->fence_mem); + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 935071410724..0fd8bb7c863e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -72,11 +72,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, if (prop.doorbell_ptr == NULL) goto err_get_kernel_doorbell; - retval = kfd2kgd->allocate_mem(dev->kgd, - queue_size, - PAGE_SIZE, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &kq->pq); + retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq); if (retval != 0) goto err_pq_allocate_vidmem; @@ -84,11 +80,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->pq_kernel_addr = kq->pq->cpu_ptr; kq->pq_gpu_addr = kq->pq->gpu_addr; - retval = kfd2kgd->allocate_mem(dev->kgd, - sizeof(*kq->rptr_kernel), - 32, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &kq->rptr_mem); + retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), + &kq->rptr_mem); if (retval != 0) goto err_rptr_allocate_vidmem; @@ -96,11 +89,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; - retval = kfd2kgd->allocate_mem(dev->kgd, - sizeof(*kq->wptr_kernel), - 32, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &kq->wptr_mem); + retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), + &kq->wptr_mem); if (retval != 0) goto err_wptr_allocate_vidmem; @@ -145,11 +135,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, } else { /* allocate fence for DIQ */ - retval = kfd2kgd->allocate_mem(dev->kgd, - sizeof(uint32_t), - 32, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &kq->fence_mem_obj); + retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t), + &kq->fence_mem_obj); if (retval != 0) goto err_alloc_fence; @@ -165,11 +152,11 @@ err_alloc_fence: err_init_mqd: uninit_queue(kq->queue); err_init_queue: - kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->wptr_mem); + kfd_gtt_sa_free(dev, kq->wptr_mem); err_wptr_allocate_vidmem: - kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->rptr_mem); + kfd_gtt_sa_free(dev, kq->rptr_mem); err_rptr_allocate_vidmem: - kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->pq); + kfd_gtt_sa_free(dev, kq->pq); err_pq_allocate_vidmem: pr_err("kfd: error init pq\n"); kfd_release_kernel_doorbell(dev, prop.doorbell_ptr); @@ -190,10 +177,12 @@ static void uninitialize(struct kernel_queue *kq) QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, kq->queue->pipe, kq->queue->queue); + else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) + kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj); - kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->rptr_mem); - kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->wptr_mem); - kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->pq); + kfd_gtt_sa_free(kq->dev, kq->rptr_mem); + kfd_gtt_sa_free(kq->dev, kq->wptr_mem); + kfd_gtt_sa_free(kq->dev, kq->pq); kfd_release_kernel_doorbell(kq->dev, kq->queue->properties.doorbell_ptr); uninit_queue(kq->queue); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 1c1fd3c765f7..678c33f0a1b8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -52,11 +52,8 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, pr_debug("kfd: In func %s\n", __func__); - retval = kfd2kgd->allocate_mem(mm->dev->kgd, - sizeof(struct cik_mqd), - 256, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) mqd_mem_obj); + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), + mqd_mem_obj); if (retval != 0) return -ENOMEM; @@ -121,11 +118,9 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, BUG_ON(!mm || !mqd || !mqd_mem_obj); - retval = kfd2kgd->allocate_mem(mm->dev->kgd, + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_sdma_rlc_registers), - 256, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) mqd_mem_obj); + mqd_mem_obj); if (retval != 0) return -ENOMEM; @@ -147,14 +142,14 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { BUG_ON(!mm || !mqd); - kfd2kgd->free_mem(mm->dev->kgd, (struct kgd_mem *) mqd_mem_obj); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { BUG_ON(!mm || !mqd); - kfd2kgd->free_mem(mm->dev->kgd, (struct kgd_mem *) mqd_mem_obj); + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, @@ -306,11 +301,8 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, pr_debug("kfd: In func %s\n", __func__); - retval = kfd2kgd->allocate_mem(mm->dev->kgd, - sizeof(struct cik_mqd), - 256, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) mqd_mem_obj); + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), + mqd_mem_obj); if (retval != 0) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 5ce9233d2004..3cda952ac2f8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -97,11 +97,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); - retval = kfd2kgd->allocate_mem(pm->dqm->dev->kgd, - *rl_buffer_size, - PAGE_SIZE, - KFD_MEMPOOL_SYSTEM_WRITECOMBINE, - (struct kgd_mem **) &pm->ib_buffer_obj); + retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, + &pm->ib_buffer_obj); if (retval != 0) { pr_err("kfd: failed to allocate runlist IB\n"); @@ -557,8 +554,7 @@ void pm_release_ib(struct packet_manager *pm) mutex_lock(&pm->lock); if (pm->allocated) { - kfd2kgd->free_mem(pm->dqm->dev->kgd, - (struct kgd_mem *) pm->ib_buffer_obj); + kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj); pm->allocated = false; } mutex_unlock(&pm->lock); -- cgit v1.2.3 From 632aa2cb081fbf6ea16d22ab332489106f10d727 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 26 Oct 2014 09:43:43 +0200 Subject: drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface Signed-off-by: Oded Gabbay Reviewed-by: Alexey Skidanov Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_kfd.c | 99 +------------------------------------ 1 file changed, 1 insertion(+), 98 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 2d604ed16b7d..cae11eefecf0 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -34,20 +34,11 @@ #define CIK_PIPE_PER_MEC (4) struct kgd_mem { - struct radeon_sa_bo *sa_bo; - uint64_t gpu_addr; - void *ptr; struct radeon_bo *bo; + uint64_t gpu_addr; void *cpu_ptr; }; -static int init_sa_manager(struct kgd_dev *kgd, unsigned int size); -static void fini_sa_manager(struct kgd_dev *kgd); - -static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment, - enum kgd_memory_pool pool, struct kgd_mem **mem); - -static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem); static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -91,10 +82,6 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int timeout); static const struct kfd2kgd_calls kfd2kgd = { - .init_sa_manager = init_sa_manager, - .fini_sa_manager = fini_sa_manager, - .allocate_mem = allocate_mem, - .free_mem = free_mem, .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, .get_vmem_size = get_vmem_size, @@ -198,90 +185,6 @@ int radeon_kfd_resume(struct radeon_device *rdev) return r; } -static u32 pool_to_domain(enum kgd_memory_pool p) -{ - switch (p) { - case KGD_POOL_FRAMEBUFFER: return RADEON_GEM_DOMAIN_VRAM; - default: return RADEON_GEM_DOMAIN_GTT; - } -} - -static int init_sa_manager(struct kgd_dev *kgd, unsigned int size) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - int r; - - BUG_ON(kgd == NULL); - - r = radeon_sa_bo_manager_init(rdev, &rdev->kfd_bo, - size, - RADEON_GPU_PAGE_SIZE, - RADEON_GEM_DOMAIN_GTT, - RADEON_GEM_GTT_WC); - - if (r) - return r; - - r = radeon_sa_bo_manager_start(rdev, &rdev->kfd_bo); - if (r) - radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo); - - return r; -} - -static void fini_sa_manager(struct kgd_dev *kgd) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - - BUG_ON(kgd == NULL); - - radeon_sa_bo_manager_suspend(rdev, &rdev->kfd_bo); - radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo); -} - -static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment, - enum kgd_memory_pool pool, struct kgd_mem **mem) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - u32 domain; - int r; - - BUG_ON(kgd == NULL); - - domain = pool_to_domain(pool); - if (domain != RADEON_GEM_DOMAIN_GTT) { - dev_err(rdev->dev, - "Only allowed to allocate gart memory for kfd\n"); - return -EINVAL; - } - - *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if ((*mem) == NULL) - return -ENOMEM; - - r = radeon_sa_bo_new(rdev, &rdev->kfd_bo, &(*mem)->sa_bo, size, - alignment); - if (r) { - dev_err(rdev->dev, "failed to get memory for kfd (%d)\n", r); - return r; - } - - (*mem)->ptr = radeon_sa_bo_cpu_addr((*mem)->sa_bo); - (*mem)->gpu_addr = radeon_sa_bo_gpu_addr((*mem)->sa_bo); - - return 0; -} - -static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem) -{ - struct radeon_device *rdev = (struct radeon_device *)kgd; - - BUG_ON(kgd == NULL); - - radeon_sa_bo_free(rdev, &mem->sa_bo, NULL); - kfree(mem); -} - static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr) -- cgit v1.2.3 From 6bbcde9803a8e385d18c5a235c961e11a8e20601 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 26 Oct 2014 09:44:09 +0200 Subject: drm/amd: Remove old radeon_sa funcs from kfd-->kgd interface Signed-off-by: Oded Gabbay Reviewed-by: Alexey Skidanov Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 3a1219005ad5..cd3878fe6f77 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -110,18 +110,6 @@ struct kgd2kfd_calls { /** * struct kfd2kgd_calls * - * @init_sa_manager: Initialize an instance of the sa manager, used by - * amdkfd for all system memory allocations that are mapped to the GART - * address space - * - * @fini_sa_manager: Releases all memory allocations for amdkfd that are - * handled by kgd sa manager - * - * @allocate_mem: Allocate a buffer from amdkfd's sa manager. The buffer can - * be used for mqds, hpds, kernel queue, fence and runlists - * - * @free_mem: Frees a buffer that was allocated by amdkfd's sa manager - * * @init_gtt_mem_allocation: Allocate a buffer on the gart aperture. * The buffer can be used for mqds, hpds, kernel queue, fence and runlists * @@ -168,16 +156,10 @@ struct kgd2kfd_calls { * */ struct kfd2kgd_calls { - /* Memory management. */ - int (*init_sa_manager)(struct kgd_dev *kgd, unsigned int size); - void (*fini_sa_manager)(struct kgd_dev *kgd); - int (*allocate_mem)(struct kgd_dev *kgd, size_t size, size_t alignment, - enum kgd_memory_pool pool, struct kgd_mem **mem); int (*init_gtt_mem_allocation)(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr); - void (*free_mem)(struct kgd_dev *kgd, struct kgd_mem *mem); void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj); uint64_t (*get_vmem_size)(struct kgd_dev *kgd); -- cgit v1.2.3