summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2016-07-26 17:30:54 -0400
committerFelix Kuehling <Felix.Kuehling@amd.com>2016-08-02 14:52:52 -0400
commite84d07348d98bf6647fc132cb4c14a3602a456f2 (patch)
treeaa1bd44937bbaee49a4ed9cfb584077a71e64f46
parent8205bf1c13fa2fe1ef63950bd18ffdc7020d84ca (diff)
drm/amdkfd: Don't dereference kfd_process.mm
The kfd_process doesn't own a reference to the mm_struct, so it can disappear without warning even while the kfd_process still exists. In fact, the delayed kfd_process teardown is triggered by an MMU notifier when the mm_struct is destroyed. Permanently holding a reference to the mm_struct would prevent this from happening. Therefore, avoid dereferencing the kfd_process.mm pointer and make it opaque. Use other ways to access the mm: * In process context, use current->mm * In calls that know the mm, use it directly * Otherwise use get_task_mm to get a reference Change-Id: Idcea859d0eaa6d62978b3a8ee54d83cbcfc0d7cd Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h7
4 files changed, 30 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index af3790f94cc5..0111510b08ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -708,9 +708,16 @@ int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm)
r = -ENODEV;
pdd = kfd_get_process_device_data(kfd, p);
- if (pdd)
+ if (pdd) {
+ if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ down_read(&mm->mmap_sem);
+
r = process_restore_queues(kfd->dqm, &pdd->qpd);
+ if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ up_read(&mm->mmap_sem);
+ }
+
up_read(&p->lock);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 6085a64c495a..fc3e21ba951f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -389,7 +389,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
BUG_ON(!dqm || !q || !q->mqd);
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
- down_read(&q->process->mm->mmap_sem);
+ down_read(&current->mm->mmap_sem);
mutex_lock(&dqm->lock);
pdd = kfd_get_process_device_data(q->device, q->process);
@@ -446,7 +446,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
out_unlock:
mutex_unlock(&dqm->lock);
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
- up_read(&q->process->mm->mmap_sem);
+ up_read(&current->mm->mmap_sem);
return retval;
}
@@ -521,14 +521,10 @@ int process_restore_queues(struct device_queue_manager *dqm,
{
struct queue *q, *next;
struct mqd_manager *mqd;
- struct kfd_process_device *pdd =
- container_of(qpd, struct kfd_process_device, qpd);
int retval = 0;
BUG_ON(!dqm || !qpd);
- if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
- down_read(&pdd->process->mm->mmap_sem);
mutex_lock(&dqm->lock);
if (qpd->evicted == 0) /* already restored, do nothing */
goto out_unlock;
@@ -568,8 +564,6 @@ int process_restore_queues(struct device_queue_manager *dqm,
out_unlock:
mutex_unlock(&dqm->lock);
- if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
- up_read(&pdd->process->mm->mmap_sem);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index f8056b153c00..f926e9030a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1025,14 +1025,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
* running so the lookup function returns a read-locked process.
*/
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct mm_struct *mm;
if (!p)
return; /* Presumably process exited. */
+ /* Take a safe reference to the mm_struct, which may otherwise
+ * disappear even while the kfd_process is still referenced.
+ */
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ up_read(&p->lock);
+ return; /* Process is exiting */
+ }
+
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
- down_read(&p->mm->mmap_sem);
- vma = find_vma(p->mm, address);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, address);
memory_exception_data.gpu_id = dev->id;
memory_exception_data.va = address;
@@ -1058,7 +1068,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
}
}
- up_read(&p->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
+ mmdrop(mm);
mutex_lock(&p->event_mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 054a7845403e..cea4efd3744d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -607,7 +607,12 @@ struct kfd_process {
*/
struct hlist_node kfd_processes;
- struct mm_struct *mm;
+ /*
+ * Opaque pointer to mm_struct. We don't hold a reference to
+ * it so it should never be dereferenced from here. This is
+ * only used for looking up processes by their mm.
+ */
+ void *mm;
struct kref ref;
struct work_struct release_work;