diff options
-rw-r--r-- | drivers/infiniband/core/rdma_core.c | 117 | ||||
-rw-r--r-- | drivers/infiniband/core/rdma_core.h | 3 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs.h | 6 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 98 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 5 |
6 files changed, 127 insertions, 108 deletions
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 4545c661acaa..eeed6374134c 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -32,6 +32,7 @@ #include <linux/file.h> #include <linux/anon_inodes.h> +#include <linux/sched/mm.h> #include <rdma/ib_verbs.h> #include <rdma/uverbs_types.h> #include <linux/rcupdate.h> @@ -284,11 +285,8 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, } ret = uverbs_try_lock_object(uobj, exclusive); - if (ret) { - WARN(uobj->ufile->cleanup_reason, - "ib_uverbs: Trying to lookup_get while cleanup context\n"); + if (ret) goto free; - } return uobj; free: @@ -694,6 +692,71 @@ void uverbs_close_fd(struct file *f) uverbs_uobject_put(uobj); } +static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + struct ib_device *ib_dev = ibcontext->device; + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + put_task_struct(owning_process); + usleep_range(1000, 2000); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the + * task struct. + */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } + + down_write(&owning_mm->mmap_sem); + ib_dev->disassociate_ucontext(ibcontext); + up_write(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); +} + +/* + * Drop the ucontext off the ufile and completely disconnect it from the + * ib_device + */ +static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) +{ + struct ib_ucontext *ucontext = ufile->ucontext; + int ret; + + if (reason == RDMA_REMOVE_DRIVER_REMOVE) + ufile_disassociate_ucontext(ucontext); + + put_pid(ucontext->tgid); + ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_HANDLE); + + /* + * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove + * the error return. + */ + ret = ucontext->device->dealloc_ucontext(ufile->ucontext); + WARN_ON(ret); + + ufile->ucontext = NULL; +} + static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason) { @@ -710,7 +773,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * We take and release the lock per traversal in order to let * other threads (which might still use the FDs) chance to run. */ - ufile->cleanup_reason = reason; list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { /* * if we hit this WARN_ON, that means we are @@ -738,18 +800,43 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, return ret; } -void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) +/* + * Destroy the uncontext and every uobject associated with it. If called with + * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has + * been completed and ufile->ucontext is NULL. + * + * This is internally locked and can be called in parallel from multiple + * contexts. + */ +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) { - enum rdma_remove_reason reason = device_removed ? - RDMA_REMOVE_DRIVER_REMOVE : - RDMA_REMOVE_CLOSE; + if (reason == RDMA_REMOVE_CLOSE) { + /* + * During destruction we might trigger something that + * synchronously calls release on any file descriptor. For + * this reason all paths that come from file_operations + * release must use try_lock. They can progress knowing that + * there is an ongoing uverbs_destroy_ufile_hw that will clean + * up the driver resources. + */ + if (!mutex_trylock(&ufile->ucontext_lock)) + return; + + } else { + mutex_lock(&ufile->ucontext_lock); + } + + down_write(&ufile->hw_destroy_rwsem); /* - * Waits for all remove_commit and alloc_commit to finish. Logically, We - * want to hold this forever as the context is going to be destroyed, - * but we'll release it since it causes a "held lock freed" BUG message. + * If a ucontext was never created then we can't have any uobjects to + * cleanup, nothing to do. */ - down_write(&ufile->hw_destroy_rwsem); + if (!ufile->ucontext) + goto done; + + ufile->ucontext->closing = true; ufile->ucontext->cleanup_retryable = true; while (!list_empty(&ufile->uobjects)) if (__uverbs_cleanup_ufile(ufile, reason)) { @@ -764,7 +851,11 @@ void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) if (!list_empty(&ufile->uobjects)) __uverbs_cleanup_ufile(ufile, reason); + ufile_destroy_ucontext(ufile, reason); + +done: up_write(&ufile->hw_destroy_rwsem); + mutex_unlock(&ufile->ucontext_lock); } const struct uverbs_obj_type_class uverbs_fd_class = { diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index db2339330f6f..a736b46d18e3 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -49,7 +49,8 @@ const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, uint16_t method); -void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed); +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason); /* * uverbs_uobject_get is called in order to increase the reference count on diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 58b16e840e56..ca9b0450d3f9 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -136,9 +136,9 @@ struct ib_uverbs_completion_event_file { struct ib_uverbs_file { struct kref ref; - struct mutex mutex; - struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; + /* Protects writing to ucontext */ + struct mutex ucontext_lock; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; struct ib_uverbs_async_event_file *async_file; @@ -155,8 +155,6 @@ struct ib_uverbs_file { spinlock_t uobjects_lock; struct list_head uobjects; - enum rdma_remove_reason cleanup_reason; - struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 409fd46a2a99..f2611c760184 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -84,7 +84,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&file->mutex); + mutex_lock(&file->ucontext_lock); if (file->ucontext) { ret = -EINVAL; @@ -150,7 +150,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, fd_install(resp.async_fd, filp); - mutex_unlock(&file->mutex); + mutex_unlock(&file->ucontext_lock); return in_len; @@ -169,7 +169,7 @@ err_alloc: ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); err: - mutex_unlock(&file->mutex); + mutex_unlock(&file->ucontext_lock); return ret; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 77faf32fc997..78d79020ea5c 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -41,8 +41,6 @@ #include <linux/fs.h> #include <linux/poll.h> #include <linux/sched.h> -#include <linux/sched/mm.h> -#include <linux/sched/task.h> #include <linux/file.h> #include <linux/cdev.h> #include <linux/anon_inodes.h> @@ -227,21 +225,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, } } -static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file, - bool device_removed) -{ - struct ib_ucontext *context = file->ucontext; - - context->closing = 1; - uverbs_cleanup_ufile(file, device_removed); - put_pid(context->tgid); - - ib_rdmacg_uncharge(&context->cg_obj, context->device, - RDMACG_RESOURCE_HCA_HANDLE); - - return context->device->dealloc_ucontext(context); -} - static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); @@ -886,8 +869,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) spin_lock_init(&file->idr_lock); idr_init(&file->idr); kref_init(&file->ref); - mutex_init(&file->mutex); - mutex_init(&file->cleanup_mutex); + mutex_init(&file->ucontext_lock); spin_lock_init(&file->uobjects_lock); INIT_LIST_HEAD(&file->uobjects); @@ -917,12 +899,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - mutex_lock(&file->cleanup_mutex); - if (file->ucontext) { - ib_uverbs_cleanup_ufile(file, false); - file->ucontext = NULL; - } - mutex_unlock(&file->cleanup_mutex); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); idr_destroy(&file->idr); mutex_lock(&file->device->lists_mutex); @@ -1109,44 +1086,6 @@ err: return; } -static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext) -{ - struct ib_device *ib_dev = ibcontext->device; - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - down_write(&owning_mm->mmap_sem); - ib_dev->disassociate_ucontext(ibcontext); - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); -} - static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { @@ -1162,39 +1101,24 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { - struct ib_ucontext *ucontext; file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; list_del(&file->list); kref_get(&file->ref); - mutex_unlock(&uverbs_dev->lists_mutex); - - - mutex_lock(&file->cleanup_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; - mutex_unlock(&file->cleanup_mutex); - /* At this point ib_uverbs_close cannot be running - * ib_uverbs_cleanup_ufile + /* We must release the mutex before going ahead and calling + * uverbs_cleanup_ufile, as it might end up indirectly calling + * uverbs_close, for example due to freeing the resources (e.g + * mmput). */ - if (ucontext) { - /* We must release the mutex before going ahead and - * calling disassociate_ucontext. disassociate_ucontext - * might end up indirectly calling uverbs_close, - * for example due to freeing the resources - * (e.g mmput). - */ - ib_uverbs_event_handler(&file->event_handler, &event); - ib_uverbs_disassociate_ucontext(ucontext); - mutex_lock(&file->cleanup_mutex); - ib_uverbs_cleanup_ufile(file, true); - mutex_unlock(&file->cleanup_mutex); - } + mutex_unlock(&uverbs_dev->lists_mutex); - mutex_lock(&uverbs_dev->lists_mutex); + ib_uverbs_event_handler(&file->event_handler, &event); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); kref_put(&file->ref, ib_uverbs_release_file); + + mutex_lock(&uverbs_dev->lists_mutex); } while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 99bcf64a4762..42cbf8eabe9d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1479,6 +1479,11 @@ struct ib_rdmacg_object { struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; + /* + * 'closing' can be read by the driver only during a destroy callback, + * it is set when we are closing the file descriptor and indicates + * that mm_sem may be locked. + */ int closing; bool cleanup_retryable; |