summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h268
1 files changed, 223 insertions, 45 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 80113c335966..92bba461e1e0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -30,13 +30,45 @@
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
+#include <linux/idr.h>
#include <linux/kfd_ioctl.h>
+#include <linux/pid.h>
+#include <linux/interval_tree.h>
#include <kgd_kfd_interface.h>
+#include "amd_rdma.h"
+
#define KFD_SYSFS_FILE_MODE 0444
-#define KFD_MMAP_DOORBELL_MASK 0x8000000000000
-#define KFD_MMAP_EVENTS_MASK 0x4000000000000
+/* GPU ID hash width in bits */
+#define KFD_GPU_ID_HASH_WIDTH 16
+
+/* Use upper bits of mmap offset to store KFD driver specific information.
+ * BITS[63:62] - Encode MMAP type
+ * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
+ * BITS[45:40] - Reserved. Not Used.
+ * BITS[39:0] - MMAP offset value. Used by TTM.
+ *
+ * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
+ * defines are w.r.t to PAGE_SIZE
+ */
+#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_MAP_BO (0x1ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_RESERVED_MEM (0x0ULL << KFD_MMAP_TYPE_SHIFT)
+
+#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
+ << KFD_MMAP_GPU_ID_SHIFT)
+#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
+ & KFD_MMAP_GPU_ID_MASK)
+#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
+ >> KFD_MMAP_GPU_ID_SHIFT)
+
+#define KFD_MMAP_OFFSET_VALUE_MASK (0xFFFFFFFFFFULL >> PAGE_SHIFT)
+#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
/*
* When working with cp scheduler we should assign the HIQ manually or via
@@ -48,8 +80,6 @@
#define KFD_CIK_HIQ_PIPE 4
#define KFD_CIK_HIQ_QUEUE 0
-/* GPU ID hash width in bits */
-#define KFD_GPU_ID_HASH_WIDTH 16
/* Macro for allocating structures */
#define kfd_alloc_struct(ptr_to_struct) \
@@ -74,12 +104,26 @@ extern int max_num_of_queues_per_device;
/* Kernel module parameter to specify the scheduling policy */
extern int sched_policy;
+extern int cwsr_enable;
+
+/*
+ * Kernel module parameter to specify the maximum process
+ * number per HW scheduler
+ */
+extern int hws_max_conc_proc;
+
/*
* Kernel module parameter to specify whether to send sigterm to HSA process on
* unhandled exception
*/
extern int send_sigterm;
+/*
+ * This kernel module is used to simulate large bar machine on non-large bar
+ * enabled machines.
+ */
+extern int debug_largebar;
+
/**
* enum kfd_sched_policy
*
@@ -114,14 +158,17 @@ enum cache_policy {
enum asic_family_type {
CHIP_KAVERI = 0,
- CHIP_CARRIZO
+ CHIP_CARRIZO,
+ CHIP_TONGA,
+ CHIP_FIJI
};
+#define KFD_IS_VI(chip) ((chip) >= CHIP_CARRIZO && (chip) <= CHIP_FIJI)
+#define KFD_IS_DGPU(chip) ((chip) >= CHIP_TONGA && (chip) <= CHIP_FIJI)
+
struct kfd_event_interrupt_class {
- bool (*interrupt_isr)(struct kfd_dev *dev,
- const uint32_t *ih_ring_entry);
- void (*interrupt_wq)(struct kfd_dev *dev,
- const uint32_t *ih_ring_entry);
+ bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
+ void (*interrupt_wq)(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
};
struct kfd_device_info {
@@ -132,6 +179,7 @@ struct kfd_device_info {
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
+ bool is_need_iommu_device;
};
struct kfd_mem_obj {
@@ -141,6 +189,12 @@ struct kfd_mem_obj {
uint32_t *cpu_ptr;
};
+struct kfd_vmid_info {
+ uint32_t first_vmid_kfd;
+ uint32_t last_vmid_kfd;
+ uint32_t vmid_num_kfd;
+};
+
struct kfd_dev {
struct kgd_dev *kgd;
@@ -165,11 +219,12 @@ struct kfd_dev {
*/
struct kgd2kfd_shared_resources shared_resources;
+ struct kfd_vmid_info vm_info;
const struct kfd2kgd_calls *kfd2kgd;
struct mutex doorbell_mutex;
- DECLARE_BITMAP(doorbell_available_index,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ unsigned long doorbell_available_index[DIV_ROUND_UP(
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
void *gtt_mem;
uint64_t gtt_start_gpu_addr;
@@ -179,6 +234,11 @@ struct kfd_dev {
unsigned int gtt_sa_chunk_size;
unsigned int gtt_sa_num_of_chunks;
+ /* QCM Device instance */
+ struct device_queue_manager *dqm;
+
+ bool init_complete;
+
/* Interrupts */
void *interrupt_ring;
size_t interrupt_ring_size;
@@ -187,10 +247,6 @@ struct kfd_dev {
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
- /* QCM Device instance */
- struct device_queue_manager *dqm;
-
- bool init_complete;
/*
* Interrupts of interest to KFD are copied
* from the HW ring into a SW ring.
@@ -198,7 +254,26 @@ struct kfd_dev {
bool interrupts_active;
/* Debug manager */
- struct kfd_dbgmgr *dbgmgr;
+ struct kfd_dbgmgr *dbgmgr;
+
+ /* MEC firmware version*/
+ uint16_t mec_fw_version;
+
+ /* Maximum process number mapped to HW scheduler */
+ unsigned int max_proc_per_quantum;
+
+ /* cwsr */
+ bool cwsr_enabled;
+ struct page *cwsr_pages;
+ uint32_t cwsr_size;
+ uint32_t tma_offset; /*Offset for TMA from the start of cwsr_mem*/
+};
+
+struct kfd_bo {
+ void *mem;
+ struct interval_tree_node it;
+ struct kfd_dev *dev;
+ struct list_head cb_data_head;
};
/* KGD2KFD callbacks */
@@ -221,22 +296,22 @@ void kfd_chardev_exit(void);
struct device *kfd_chardev(void);
/**
- * enum kfd_preempt_type_filter
+ * enum kfd_unmap_queues_filter
*
- * @KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: Preempts single queue.
+ * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
*
- * @KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES: Preempts all queues in the
+ * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
* running queues list.
*
- * @KFD_PRERMPT_TYPE_FILTER_BY_PASID: Preempts queues that belongs to
+ * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
* specific process.
*
*/
-enum kfd_preempt_type_filter {
- KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
- KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
- KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES,
- KFD_PREEMPT_TYPE_FILTER_BY_PASID
+enum kfd_unmap_queues_filter {
+ KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE,
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+ KFD_UNMAP_QUEUES_FILTER_BY_PASID
};
enum kfd_preempt_type {
@@ -324,6 +399,7 @@ struct queue_properties {
uint32_t __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
+ bool is_evicted; /* true -> queue is evicted */
bool is_active;
/* Not relevant for user mode queues in cp scheduling */
unsigned int vmid;
@@ -336,6 +412,11 @@ struct queue_properties {
uint32_t eop_ring_buffer_size;
uint64_t ctx_save_restore_area_address;
uint32_t ctx_save_restore_area_size;
+ uint32_t ctl_stack_size;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
+ /* Relevant for CU */
+ uint32_t cu_mask;
};
/**
@@ -424,6 +505,7 @@ struct qcm_process_device {
unsigned int queue_count;
unsigned int vmid;
bool is_debug;
+ unsigned evicted; /* eviction counter, 0=active */
/*
* All the memory management data should be here too
*/
@@ -436,8 +518,22 @@ struct qcm_process_device {
uint32_t gds_size;
uint32_t num_gws;
uint32_t num_oac;
+ uint32_t sh_hidden_private_base;
+
+ /*cwsr memory*/
+ int cwsr_mem_handle;
+ uint64_t cwsr_base;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
+ void *cwsr_kaddr;
};
+/*8 byte handle containing GPU ID in the most significant 4 bytes and
+ * idr_handle in the least significant 4 bytes*/
+#define MAKE_HANDLE(gpu_id, idr_handle) (((uint64_t)(gpu_id) << 32) + idr_handle)
+#define GET_GPU_ID(handle) (handle >> 32)
+#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
+
/* Data that is per-process-per device. */
struct kfd_process_device {
/*
@@ -449,6 +545,8 @@ struct kfd_process_device {
/* The device that owns this data. */
struct kfd_dev *dev;
+ /* The process that owns this kfd_process_device. */
+ struct kfd_process *process;
/* per-process-per device QCM data structure */
struct qcm_process_device qpd;
@@ -460,10 +558,23 @@ struct kfd_process_device {
uint64_t gpuvm_limit;
uint64_t scratch_base;
uint64_t scratch_limit;
+ uint64_t dgpu_base;
+ uint64_t dgpu_limit;
+ uint64_t mapped_size;
+ uint64_t last_eviction;
+ bool evicted;
+
+ uint64_t sh_hidden_private_base_vmid;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
bool bound;
+ /* VM context for GPUVM allocations */
+ void *vm;
+
+ /* GPUVM allocations storage */
+ struct idr alloc_idr;
+
/* This flag tells if we should reset all
* wavefronts on process termination
*/
@@ -482,7 +593,7 @@ struct kfd_process {
struct mm_struct *mm;
- struct mutex mutex;
+ struct rw_semaphore lock;
/*
* In any process, the thread that started main() is the lead
@@ -513,6 +624,8 @@ struct kfd_process {
/* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
struct kfd_queue **queues;
+ unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
+
/*Is the user space process 32 bit?*/
bool is_32bit_user_mode;
@@ -520,10 +633,12 @@ struct kfd_process {
struct mutex event_mutex;
/* All events in process hashed by ID, linked on kfd_event.events. */
DECLARE_HASHTABLE(events, 4);
- struct list_head signal_event_pages; /* struct slot_page_header.
- event_pages */
+ struct list_head signal_event_pages; /* struct slot_page_header.event_pages */
u32 next_nonsignal_event_id;
size_t signal_event_count;
+ size_t debug_event_count;
+
+ struct rb_root bo_interval_tree;
};
/**
@@ -546,9 +661,10 @@ struct amdkfd_ioctl_desc {
void kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
-struct kfd_process *kfd_create_process(const struct task_struct *);
+struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
+struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
struct kfd_process *p);
@@ -558,6 +674,29 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
+int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vma);
+
+/* KFD process API for creating and translating handles */
+int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
+ void *mem, uint64_t start,
+ uint64_t length);
+void *kfd_process_device_translate_handle(struct kfd_process_device *p,
+ int handle);
+struct kfd_bo *kfd_process_device_find_bo(struct kfd_process_device *pdd,
+ int handle);
+void *kfd_process_find_bo_from_interval(struct kfd_process *p,
+ uint64_t start_addr,
+ uint64_t last_addr);
+void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
+ int handle);
+
+void run_rdma_free_callback(struct kfd_bo *buf_obj);
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
+
+/* kfd dgpu memory */
+int kfd_map_memory_to_gpu(struct kfd_dev *dev, void *mem,
+ struct kfd_process *p, struct kfd_process_device *pdd);
+
/* Process device data iterator */
struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
@@ -600,7 +739,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu);
int kfd_topology_remove_device(struct kfd_dev *gpu);
struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
-struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
+uint32_t kfd_get_gpu_id(struct kfd_dev *dev);
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
+int kfd_numa_node_to_apic_id(int numa_node_id);
+int kfd_get_proximity_domain(const struct pci_bus *bus);
/* Interrupts */
int kfd_interrupt_init(struct kfd_dev *dev);
@@ -615,11 +758,13 @@ int kgd2kfd_resume(struct kfd_dev *kfd);
/* amdkfd Apertures */
int kfd_init_apertures(struct kfd_process *process);
+int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd,
+ uint64_t base, uint64_t limit);
/* Queue Context Management */
struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd);
-int init_queue(struct queue **q, struct queue_properties properties);
+int init_queue(struct queue **q, const struct queue_properties *properties);
void uninit_queue(struct queue *q);
void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
@@ -630,11 +775,15 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq);
+int kfd_process_vm_fault(struct device_queue_manager *dqm,
+ unsigned int pasid);
/* Process Queue Manager */
struct process_queue_node {
@@ -649,18 +798,16 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev,
struct file *f,
struct queue_properties *properties,
- unsigned int flags,
- enum kfd_queue_type type,
unsigned int *qid);
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
+int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
+ struct queue_properties *p);
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
unsigned int qid);
-
-int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
- unsigned int fence_value,
- unsigned long timeout);
+int kgd2kfd_quiesce_mm(struct kfd_dev *kfd, struct mm_struct *mm);
+int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm);
/* Packet Manager */
@@ -668,7 +815,9 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
#define KFD_FENCE_COMPLETED (100)
#define KFD_FENCE_INIT (10)
-#define KFD_UNMAP_LATENCY (150)
+#define KFD_UNMAP_LATENCY (40)
+
+struct packet_manager_firmware;
struct packet_manager {
struct device_queue_manager *dqm;
@@ -676,9 +825,19 @@ struct packet_manager {
struct mutex lock;
bool allocated;
struct kfd_mem_obj *ib_buffer_obj;
+
+ struct packet_manager_firmware *pmf;
};
-int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
+struct packet_manager_firmware {
+ /* Support different firmware versions for map process packet */
+ int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
+ struct qcm_process_device *qpd);
+ int (*get_map_process_packet_size)(void);
+};
+
+int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm,
+ uint16_t fw_ver);
void pm_uninit(struct packet_manager *pm);
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res);
@@ -687,7 +846,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint32_t fence_value);
int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
- enum kfd_preempt_type_filter mode,
+ enum kfd_unmap_queues_filter mode,
uint32_t filter_param, bool reset,
unsigned int sdma_engine);
@@ -696,6 +855,9 @@ void pm_release_ib(struct packet_manager *pm);
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process);
+int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
+ unsigned int fence_value,
+ unsigned long timeout);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
@@ -714,8 +876,7 @@ int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
bool all, uint32_t user_timeout_ms,
enum kfd_event_wait_result *wait_result);
-void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
- uint32_t valid_id_bits);
+void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits);
void kfd_signal_iommu_event(struct kfd_dev *dev,
unsigned int pasid, unsigned long address,
bool is_write_requested, bool is_execute_requested);
@@ -723,11 +884,28 @@ void kfd_signal_hw_exception_event(unsigned int pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
- uint32_t event_type, bool auto_reset, uint32_t node_id,
- uint32_t *event_id, uint32_t *event_trigger_data,
- uint64_t *event_page_offset, uint32_t *event_slot_index);
+ uint32_t event_type, bool auto_reset, uint32_t node_id,
+ uint32_t *event_id, uint32_t *event_trigger_data,
+ uint64_t *event_page_offset, uint32_t *event_slot_index,
+ void *kern_addr);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
+void kfd_free_signal_page_dgpu(struct kfd_process *p, uint64_t handle);
+
+void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
+ struct kfd_vm_fault_info *info);
+
+void radeon_flush_tlb(struct kfd_dev *dev, uint32_t pasid);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
+int kgd2kfd_evict_bo(struct kfd_dev *dev, void *mem);
+int kgd2kfd_restore(struct kfd_dev *kfd);
+int evict_size(struct kfd_process *p, int size, int type);
+int evict_bo(struct kfd_dev *dev, void *mem);
+int restore(struct kfd_dev *kfd);
+
+#define KFD_SCRATCH_CZ_FW_VER 600
+#define KFD_SCRATCH_KV_FW_VER 413
+#define KFD_MULTI_PROC_MAPPING_HWS_SUPPORT 600
+#define KFD_CWSR_CZ_FW_VER 625
#endif