diff options
author | Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> | 2016-09-02 15:01:41 -0400 |
---|---|---|
committer | Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> | 2016-09-21 18:01:47 -0400 |
commit | 949cfecd0d080873eb036af9c534c4dd13596080 (patch) | |
tree | bc71c34ffa341e01ac69f68335124d4406dd837b /drivers | |
parent | e85bac90f6e26119fec89031f998985b5c23433b (diff) |
drm/amdkfd: Add kgd2kfd schedule_evict_and_restore_process
Change-Id: I27860af58c54449a9ba1fc0a04e0436edb7fae8b
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 116 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_module.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 6 |
5 files changed, 136 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0ce957253e80..6acc5fc260bc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/debugfs.h> +#include <linux/fence.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers.h" @@ -745,6 +746,42 @@ int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm) return r; } +/* quiesce_process_mm - + * Quiesce all user queues that belongs to given process p + */ +static int quiesce_process_mm(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + int r = 0; + unsigned int n_evicted = 0; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + r = process_evict_queues(pdd->dev->dqm, &pdd->qpd); + if (r != 0) { + pr_err("Failed to evict process queues\n"); + goto fail; + } + n_evicted++; + } + + return r; + +fail: + /* To keep state consistent, roll back partial eviction by + * restoring queues + */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + if (n_evicted == 0) + break; + if (process_restore_queues(pdd->dev->dqm, &pdd->qpd)) + pr_err("Failed to restore queues\n"); + + n_evicted--; + } + + return r; +} + /* resume_process_mm - * Resume all user queues that belongs to given process p. The caller must * ensure that process p context is valid. @@ -827,6 +864,85 @@ void kfd_restore_bo_worker(struct work_struct *work) pr_err("Failed to resume user queues\n"); } +/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will + * prepare for safe eviction of KFD BOs that belong to the specified + * process. + * + * @mm: mm_struct that identifies the specified KFD process + * @fence: eviction fence attached to KFD process BOs + * + */ +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct fence *fence) +{ + struct kfd_process *p; + + if (!fence) + return -EINVAL; + + if (fence_is_signaled(fence)) + return 0; + + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ENODEV; + + if (work_pending(&p->eviction_work.work)) { + /* It is possible has TTM has lined up couple of BOs of the same + * process to be evicted. Check if the fence is same which + * indicates that previous work item scheduled is not complted + */ + if (p->eviction_work.eviction_fence == fence) + goto out; + else { + WARN(1, "Starting new evict with previous evict is not completed\n"); + cancel_work_sync(&p->eviction_work.work); + } + } + + /* During process initialization eviction_work.work is initialized + * to kfd_evict_bo_worker + */ + p->eviction_work.eviction_fence = fence_get(fence); + schedule_work(&p->eviction_work.work); +out: + kfd_unref_process(p); + return 0; +} + +void kfd_evict_bo_worker(struct work_struct *work) +{ + int ret; + struct kfd_process *p; + struct kfd_eviction_work *eviction_work; + + eviction_work = container_of(work, struct kfd_eviction_work, + work); + + /* Process termination destroys this worker thread. So during the + * lifetime of this thread, kfd_process p will be valid + */ + p = container_of(eviction_work, struct kfd_process, eviction_work); + + /* Narrow window of overlap between restore and evict work item is + * possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos unreserves + * KFD BOs, it is possible to evicted again. But restore has few more + * steps of finish. So lets wait for the restore work to complete + */ + if (delayed_work_pending(&p->restore_work)) + flush_delayed_work(&p->restore_work); + + ret = quiesce_process_mm(p); + if (!ret) { + fence_signal(eviction_work->eviction_fence); + fence_put(eviction_work->eviction_fence); + kfd_schedule_restore_bos_and_queues(p); + } else { + pr_err("Failed to quiesce user queues. Cannot evict BOs\n"); + } + +} + static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 914a0cd04cfc..42c559bd76da 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -47,6 +47,8 @@ static const struct kgd2kfd_calls kgd2kfd = { .restore = kgd2kfd_restore, .quiesce_mm = kgd2kfd_quiesce_mm, .resume_mm = kgd2kfd_resume_mm, + .schedule_evict_and_restore_process = + kgd2kfd_schedule_evict_and_restore_process, }; int sched_policy = KFD_SCHED_POLICY_HWS; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b3f26656f8bb..9dea8f210fc1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -542,12 +542,20 @@ struct qcm_process_device { }; /* KFD Memory Eviction */ +struct kfd_eviction_work { + struct work_struct work; + struct fence *eviction_fence; +}; + /* Appox. wait time before attempting to restore evicted BOs */ #define PROCESS_RESTORE_TIME_MS 2000 /* Approx. back off time if restore fails due to lack of memory */ #define PROCESS_BACK_OFF_TIME_MS 1000 +void kfd_evict_bo_worker(struct work_struct *work); void kfd_restore_bo_worker(struct work_struct *work); +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct fence *fence); /*8 byte handle containing GPU ID in the most significant 4 bytes and @@ -681,7 +689,8 @@ struct kfd_process { void *master_vm; - /* For restoring BOs after eviction */ + /* Work items for evicting and restoring BOs */ + struct kfd_eviction_work eviction_work; struct delayed_work restore_work; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3121061d0ed6..d7a69e3f8b5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -509,6 +509,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, p = container_of(mn, struct kfd_process, mmu_notifier); BUG_ON(p->mm != mm); + cancel_work_sync(&p->eviction_work.work); cancel_delayed_work_sync(&p->restore_work); mutex_lock(&kfd_processes_mutex); @@ -668,6 +669,7 @@ static struct kfd_process *create_process(const struct task_struct *thread, if (err) goto err_init_cwsr; + INIT_WORK(&process->eviction_work.work, kfd_evict_bo_worker); INIT_DELAYED_WORK(&process->restore_work, kfd_restore_bo_worker); return process; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 25a47578bc21..b62b1eefa4d8 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -31,6 +31,7 @@ #include <linux/types.h> #include <linux/mm_types.h> #include <linux/scatterlist.h> +#include <linux/fence.h> struct pci_dev; @@ -384,6 +385,9 @@ struct kfd2kgd_calls { * * @resume_mm: Resume user queue access to specified MM address space * + * @schedule_evict_and_restore_process: Schedules work queue that will prepare + * for safe eviction of KFD BOs that belong to the specified process. + * * This structure contains function callback pointers so the kgd driver * will notify to the amdkfd about certain status changes. * @@ -402,6 +406,8 @@ struct kgd2kfd_calls { int (*restore)(struct kfd_dev *kfd); int (*quiesce_mm)(struct kfd_dev *kfd, struct mm_struct *mm); int (*resume_mm)(struct kfd_dev *kfd, struct mm_struct *mm); + int (*schedule_evict_and_restore_process)(struct mm_struct *mm, + struct fence *fence); }; int kgd2kfd_init(unsigned interface_version, |