diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 14:08:20 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 14:08:20 -0800 |
commit | 23281c8034879c47639ee0f76c34d13ef6beb8ce (patch) | |
tree | bacd31cad4a799b1fc2f77827cbaf7a929c377b8 | |
parent | f0b60bfa952458286f43a63c07b0eea170b2cc95 (diff) | |
parent | ab97f87325e28b7ef7717e6cb62e8da14a7176e1 (diff) |
Merge branch 'fsnotify' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fsnotify updates from Jan Kara:
- fixes of use-after-tree issues when handling fanotify permission
events from Miklos
- refcount_t conversions from Elena
- fixes of ENOMEM handling in dnotify and fsnotify from me
* 'fsnotify' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
fsnotify: convert fsnotify_mark.refcnt from atomic_t to refcount_t
fanotify: clean up CONFIG_FANOTIFY_ACCESS_PERMISSIONS ifdefs
fsnotify: clean up fsnotify()
fanotify: fix fsnotify_prepare_user_wait() failure
fsnotify: fix pinning group in fsnotify_prepare_user_wait()
fsnotify: pin both inode and vfsmount mark
fsnotify: clean up fsnotify_prepare/finish_user_wait()
fsnotify: convert fsnotify_group.refcnt from atomic_t to refcount_t
fsnotify: Protect bail out path of fsnotify_add_mark_locked() properly
dnotify: Handle errors from fsnotify_add_mark_locked() in fcntl_dirnotify()
-rw-r--r-- | fs/notify/dnotify/dnotify.c | 7 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.c | 49 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.h | 8 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 43 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 101 | ||||
-rw-r--r-- | fs/notify/group.c | 6 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 4 | ||||
-rw-r--r-- | fs/notify/mark.c | 121 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 7 | ||||
-rw-r--r-- | kernel/audit_tree.c | 2 |
10 files changed, 162 insertions, 186 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index cba328315929..63a1ca4b9dee 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -319,7 +319,11 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { - fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); + error = fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); + if (error) { + mutex_unlock(&dnotify_group->mark_mutex); + goto out_err; + } spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; @@ -345,6 +349,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) */ if (dn_mark == new_dn_mark) destroy = 1; + error = 0; goto out; } diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 09640b546363..54cf2d21b547 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -36,15 +36,13 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) pr_debug("%s: list=%p event=%p\n", __func__, list, event); -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* * Don't merge a permission event with any other event so that we know * the event structure we have created in fanotify_handle_event() is the * one we should check for permission response. */ - if (event->mask & FAN_ALL_PERM_EVENTS) + if (fanotify_is_perm_event(event->mask)) return 0; -#endif list_for_each_entry_reverse(test_event, list, list) { if (should_merge(test_event, event)) { @@ -56,7 +54,6 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) return 0; } -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS static int fanotify_get_response(struct fsnotify_group *group, struct fanotify_perm_event_info *event, struct fsnotify_iter_info *iter_info) @@ -65,19 +62,8 @@ static int fanotify_get_response(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - /* - * fsnotify_prepare_user_wait() fails if we race with mark deletion. - * Just let the operation pass in that case. - */ - if (!fsnotify_prepare_user_wait(iter_info)) { - event->response = FAN_ALLOW; - goto out; - } - wait_event(group->fanotify_data.access_waitq, event->response); - fsnotify_finish_user_wait(iter_info); -out: /* userspace responded, convert to something usable */ switch (event->response) { case FAN_ALLOW: @@ -94,7 +80,6 @@ out: return ret; } -#endif static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmnt_mark, @@ -153,8 +138,7 @@ struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, { struct fanotify_event_info *event; -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (mask & FAN_ALL_PERM_EVENTS) { + if (fanotify_is_perm_event(mask)) { struct fanotify_perm_event_info *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, @@ -165,7 +149,6 @@ struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, pevent->response = 0; goto init; } -#endif event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); if (!event) return NULL; @@ -212,9 +195,19 @@ static int fanotify_handle_event(struct fsnotify_group *group, pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, mask); + if (fanotify_is_perm_event(mask)) { + /* + * fsnotify_prepare_user_wait() fails if we race with mark + * deletion. Just let the operation pass in that case. + */ + if (!fsnotify_prepare_user_wait(iter_info)) + return 0; + } + event = fanotify_alloc_event(inode, mask, data); + ret = -ENOMEM; if (unlikely(!event)) - return -ENOMEM; + goto finish; fsn_event = &event->fse; ret = fsnotify_add_event(group, fsn_event, fanotify_merge); @@ -224,16 +217,16 @@ static int fanotify_handle_event(struct fsnotify_group *group, /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); - return 0; - } - -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (mask & FAN_ALL_PERM_EVENTS) { + ret = 0; + } else if (fanotify_is_perm_event(mask)) { ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), iter_info); fsnotify_destroy_event(group, fsn_event); } -#endif +finish: + if (fanotify_is_perm_event(mask)) + fsnotify_finish_user_wait(iter_info); + return ret; } @@ -253,13 +246,11 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) event = FANOTIFY_E(fsn_event); path_put(&event->path); put_pid(event->tgid); -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (fsn_event->mask & FAN_ALL_PERM_EVENTS) { + if (fanotify_is_perm_event(fsn_event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PE(fsn_event)); return; } -#endif kmem_cache_free(fanotify_event_cachep, event); } diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 7dacb7d80727..256d9d1ddea9 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -22,7 +22,6 @@ struct fanotify_event_info { struct pid *tgid; }; -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the @@ -41,7 +40,12 @@ FANOTIFY_PE(struct fsnotify_event *fse) { return container_of(fse, struct fanotify_perm_event_info, fae.fse); } -#endif + +static inline bool fanotify_is_perm_event(u32 mask) +{ + return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) && + mask & FAN_ALL_PERM_EVENTS; +} static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9752e7270e61..55499f5a1c96 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -143,7 +143,6 @@ static int fill_event_metadata(struct fsnotify_group *group, return ret; } -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS static struct fanotify_perm_event_info *dequeue_event( struct fsnotify_group *group, int fd) { @@ -200,7 +199,6 @@ static int process_access_response(struct fsnotify_group *group, return 0; } -#endif static ssize_t copy_event_to_user(struct fsnotify_group *group, struct fsnotify_event *event, @@ -222,10 +220,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, fanotify_event_metadata.event_len)) goto out_close_fd; -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (event->mask & FAN_ALL_PERM_EVENTS) + if (fanotify_is_perm_event(event->mask)) FANOTIFY_PE(event)->fd = fd; -#endif if (fd != FAN_NOFD) fd_install(fd, f); @@ -310,10 +306,9 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, * Permission events get queued to wait for response. Other * events can be destroyed now. */ - if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) { + if (!fanotify_is_perm_event(kevent->mask)) { fsnotify_destroy_event(group, kevent); } else { -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (ret <= 0) { FANOTIFY_PE(kevent)->response = FAN_DENY; wake_up(&group->fanotify_data.access_waitq); @@ -323,7 +318,6 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, &group->fanotify_data.access_list); spin_unlock(&group->notification_lock); } -#endif } if (ret < 0) break; @@ -339,11 +333,13 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS struct fanotify_response response = { .fd = -1, .response = -1 }; struct fsnotify_group *group; int ret; + if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) + return -EINVAL; + group = file->private_data; if (count > sizeof(response)) @@ -359,16 +355,11 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count = ret; return count; -#else - return -EINVAL; -#endif } static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS struct fanotify_perm_event_info *event, *next; struct fsnotify_event *fsn_event; @@ -404,14 +395,14 @@ static int fanotify_release(struct inode *ignored, struct file *file) spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); spin_lock(&group->notification_lock); - } else + } else { FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; + } } spin_unlock(&group->notification_lock); /* Response for all permission events it set, wakeup waiters */ wake_up(&group->fanotify_data.access_waitq); -#endif /* matches the fanotify_init->fsnotify_alloc_group */ fsnotify_destroy_group(group); @@ -769,10 +760,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (force_o_largefile()) event_f_flags |= O_LARGEFILE; group->fanotify_data.f_flags = event_f_flags; -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); -#endif switch (flags & FAN_ALL_CLASS_BITS) { case FAN_CLASS_NOTIF: group->priority = FS_PRIO_0; @@ -826,6 +815,7 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, struct fsnotify_group *group; struct fd f; struct path path; + u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD; int ret; pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", @@ -856,11 +846,10 @@ SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, mask &= ~FAN_ONDIR; } -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD)) -#else - if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD)) -#endif + if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) + valid_mask |= FAN_ALL_PERM_EVENTS; + + if (mask & ~valid_mask) return -EINVAL; f = fdget(fanotify_fd); @@ -950,10 +939,10 @@ static int __init fanotify_user_setup(void) { fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event_info, - SLAB_PANIC); -#endif + if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { + fanotify_perm_event_cachep = + KMEM_CACHE(fanotify_perm_event_info, SLAB_PANIC); + } return 0; } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 0c4583b61717..81d8959b6aef 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -243,6 +243,29 @@ static int send_to_group(struct inode *to_tell, file_name, cookie, iter_info); } +static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp) +{ + struct fsnotify_mark_connector *conn; + struct hlist_node *node = NULL; + + conn = srcu_dereference(*connp, &fsnotify_mark_srcu); + if (conn) + node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu); + + return hlist_entry_safe(node, struct fsnotify_mark, obj_list); +} + +static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) +{ + struct hlist_node *node = NULL; + + if (mark) + node = srcu_dereference(mark->obj_list.next, + &fsnotify_mark_srcu); + + return hlist_entry_safe(node, struct fsnotify_mark, obj_list); +} + /* * This is the main call to fsnotify. The VFS calls into hook specific functions * in linux/fsnotify.h. Those functions then in turn call here. Here will call @@ -252,11 +275,7 @@ static int send_to_group(struct inode *to_tell, int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, const unsigned char *file_name, u32 cookie) { - struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; - struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; - struct fsnotify_group *inode_group, *vfsmount_group; - struct fsnotify_mark_connector *inode_conn, *vfsmount_conn; - struct fsnotify_iter_info iter_info; + struct fsnotify_iter_info iter_info = {}; struct mount *mnt; int ret = 0; /* global tests shouldn't care about events on child only the specific event */ @@ -291,26 +310,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, if ((mask & FS_MODIFY) || (test_mask & to_tell->i_fsnotify_mask)) { - inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, - &fsnotify_mark_srcu); - if (inode_conn) - inode_node = srcu_dereference(inode_conn->list.first, - &fsnotify_mark_srcu); + iter_info.inode_mark = + fsnotify_first_mark(&to_tell->i_fsnotify_marks); } if (mnt && ((mask & FS_MODIFY) || (test_mask & mnt->mnt_fsnotify_mask))) { - inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, - &fsnotify_mark_srcu); - if (inode_conn) - inode_node = srcu_dereference(inode_conn->list.first, - &fsnotify_mark_srcu); - vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks, - &fsnotify_mark_srcu); - if (vfsmount_conn) - vfsmount_node = srcu_dereference( - vfsmount_conn->list.first, - &fsnotify_mark_srcu); + iter_info.inode_mark = + fsnotify_first_mark(&to_tell->i_fsnotify_marks); + iter_info.vfsmount_mark = + fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } /* @@ -318,39 +327,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * ignore masks are properly reflected for mount mark notifications. * That's why this traversal is so complicated... */ - while (inode_node || vfsmount_node) { - inode_group = NULL; - inode_mark = NULL; - vfsmount_group = NULL; - vfsmount_mark = NULL; - - if (inode_node) { - inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), - struct fsnotify_mark, obj_list); - inode_group = inode_mark->group; - } - - if (vfsmount_node) { - vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), - struct fsnotify_mark, obj_list); - vfsmount_group = vfsmount_mark->group; - } - - if (inode_group && vfsmount_group) { - int cmp = fsnotify_compare_groups(inode_group, - vfsmount_group); - if (cmp > 0) { - inode_group = NULL; + while (iter_info.inode_mark || iter_info.vfsmount_mark) { + struct fsnotify_mark *inode_mark = iter_info.inode_mark; + struct fsnotify_mark *vfsmount_mark = iter_info.vfsmount_mark; + + if (inode_mark && vfsmount_mark) { + int cmp = fsnotify_compare_groups(inode_mark->group, + vfsmount_mark->group); + if (cmp > 0) inode_mark = NULL; - } else if (cmp < 0) { - vfsmount_group = NULL; + else if (cmp < 0) vfsmount_mark = NULL; - } } - iter_info.inode_mark = inode_mark; - iter_info.vfsmount_mark = vfsmount_mark; - ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, data, data_is, cookie, file_name, &iter_info); @@ -358,12 +347,12 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; - if (inode_group) - inode_node = srcu_dereference(inode_node->next, - &fsnotify_mark_srcu); - if (vfsmount_group) - vfsmount_node = srcu_dereference(vfsmount_node->next, - &fsnotify_mark_srcu); + if (inode_mark) + iter_info.inode_mark = + fsnotify_next_mark(iter_info.inode_mark); + if (vfsmount_mark) + iter_info.vfsmount_mark = + fsnotify_next_mark(iter_info.vfsmount_mark); } ret = 0; out: diff --git a/fs/notify/group.c b/fs/notify/group.c index 32357534de18..b7a4b6a69efa 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -107,7 +107,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) */ void fsnotify_get_group(struct fsnotify_group *group) { - atomic_inc(&group->refcnt); + refcount_inc(&group->refcnt); } /* @@ -115,7 +115,7 @@ void fsnotify_get_group(struct fsnotify_group *group) */ void fsnotify_put_group(struct fsnotify_group *group) { - if (atomic_dec_and_test(&group->refcnt)) + if (refcount_dec_and_test(&group->refcnt)) fsnotify_final_destroy_group(group); } @@ -131,7 +131,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) return ERR_PTR(-ENOMEM); /* set to 0 when there a no external references to this group */ - atomic_set(&group->refcnt, 1); + refcount_set(&group->refcnt, 1); atomic_set(&group->num_marks, 0); atomic_set(&group->user_waits, 0); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 7cc7d3fb1862..d3c20e0bb046 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -376,7 +376,7 @@ static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group fsnotify_get_mark(fsn_mark); /* One ref for being in the idr, one ref we just took */ - BUG_ON(atomic_read(&fsn_mark->refcnt) < 2); + BUG_ON(refcount_read(&fsn_mark->refcnt) < 2); } return i_mark; @@ -446,7 +446,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, * One ref for being in the idr * one ref grabbed by inotify_idr_find */ - if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 2)) { + if (unlikely(refcount_read(&i_mark->fsn_mark.refcnt) < 2)) { printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); /* we can't really recover with bad ref cnting.. */ diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 9991f8826734..e9191b416434 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -105,18 +105,8 @@ static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); void fsnotify_get_mark(struct fsnotify_mark *mark) { - WARN_ON_ONCE(!atomic_read(&mark->refcnt)); - atomic_inc(&mark->refcnt); -} - -/* - * Get mark reference when we found the mark via lockless traversal of object - * list. Mark can be already removed from the list by now and on its way to be - * destroyed once SRCU period ends. - */ -static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) -{ - return atomic_inc_not_zero(&mark->refcnt); + WARN_ON_ONCE(!refcount_read(&mark->refcnt)); + refcount_inc(&mark->refcnt); } static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) @@ -211,7 +201,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) /* Catch marks that were actually never attached to object */ if (!mark->connector) { - if (atomic_dec_and_test(&mark->refcnt)) + if (refcount_dec_and_test(&mark->refcnt)) fsnotify_final_mark_destroy(mark); return; } @@ -220,7 +210,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) * We have to be careful so that traversals of obj_list under lock can * safely grab mark reference. */ - if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) + if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock)) return; conn = mark->connector; @@ -256,32 +246,60 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) FSNOTIFY_REAPER_DELAY); } -bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) +/* + * Get mark reference when we found the mark via lockless traversal of object + * list. Mark can be already removed from the list by now and on its way to be + * destroyed once SRCU period ends. + * + * Also pin the group so it doesn't disappear under us. + */ +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) { - struct fsnotify_group *group; - - if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) - return false; - - if (iter_info->inode_mark) - group = iter_info->inode_mark->group; - else - group = iter_info->vfsmount_mark->group; + if (!mark) + return true; + + if (refcount_inc_not_zero(&mark->refcnt)) { + spin_lock(&mark->lock); + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { + /* mark is attached, group is still alive then */ + atomic_inc(&mark->group->user_waits); + spin_unlock(&mark->lock); + return true; + } + spin_unlock(&mark->lock); + fsnotify_put_mark(mark); + } + return false; +} - /* - * Since acquisition of mark reference is an atomic op as well, we can - * be sure this inc is seen before any effect of refcount increment. - */ - atomic_inc(&group->user_waits); +/* + * Puts marks and wakes up group destruction if necessary. + * + * Pairs with fsnotify_get_mark_safe() + */ +static void fsnotify_put_mark_wake(struct fsnotify_mark *mark) +{ + if (mark) { + struct fsnotify_group *group = mark->group; - if (iter_info->inode_mark) { - /* This can fail if mark is being removed */ - if (!fsnotify_get_mark_safe(iter_info->inode_mark)) - goto out_wait; + fsnotify_put_mark(mark); + /* + * We abuse notification_waitq on group shutdown for waiting for + * all marks pinned when waiting for userspace. + */ + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); } - if (iter_info->vfsmount_mark) { - if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) - goto out_inode; +} + +bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) +{ + /* This can fail if mark is being removed */ + if (!fsnotify_get_mark_safe(iter_info->inode_mark)) + return false; + if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) { + fsnotify_put_mark_wake(iter_info->inode_mark); + return false; } /* @@ -292,34 +310,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); return true; -out_inode: - if (iter_info->inode_mark) - fsnotify_put_mark(iter_info->inode_mark); -out_wait: - if (atomic_dec_and_test(&group->user_waits) && group->shutdown) - wake_up(&group->notification_waitq); - return false; } void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) { - struct fsnotify_group *group = NULL; - iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - if (iter_info->inode_mark) { - group = iter_info->inode_mark->group; - fsnotify_put_mark(iter_info->inode_mark); - } - if (iter_info->vfsmount_mark) { - group = iter_info->vfsmount_mark->group; - fsnotify_put_mark(iter_info->vfsmount_mark); - } - /* - * We abuse notification_waitq on group shutdown for waiting for all - * marks pinned when waiting for userspace. - */ - if (atomic_dec_and_test(&group->user_waits) && group->shutdown) - wake_up(&group->notification_waitq); + fsnotify_put_mark_wake(iter_info->inode_mark); + fsnotify_put_mark_wake(iter_info->vfsmount_mark); } /* @@ -338,7 +335,7 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && - atomic_read(&mark->refcnt) < 1 + + refcount_read(&mark->refcnt) < 1 + !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); spin_lock(&mark->lock); @@ -599,9 +596,11 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, return ret; err: + spin_lock(&mark->lock); mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED); list_del_init(&mark->g_list); + spin_unlock(&mark->lock); atomic_dec(&group->num_marks); fsnotify_put_mark(mark); @@ -738,7 +737,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, { memset(mark, 0, sizeof(*mark)); spin_lock_init(&mark->lock); - atomic_set(&mark->refcnt, 1); + refcount_set(&mark->refcnt, 1); fsnotify_get_group(group); mark->group = group; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 3597ef78df4d..de1b0c8e46ad 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -18,6 +18,7 @@ #include <linux/types.h> #include <linux/atomic.h> #include <linux/user_namespace.h> +#include <linux/refcount.h> /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -136,7 +137,7 @@ struct fsnotify_group { * inotify_init() and the refcnt will hit 0 only when that fd has been * closed. */ - atomic_t refcnt; /* things with interest in this group */ + refcount_t refcnt; /* things with interest in this group */ const struct fsnotify_ops *ops; /* how this group handles things */ @@ -183,11 +184,9 @@ struct fsnotify_group { #endif #ifdef CONFIG_FANOTIFY struct fanotify_group_private_data { -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* allows a group to block waiting for a userspace response */ struct list_head access_list; wait_queue_head_t access_waitq; -#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; struct user_struct *user; @@ -244,7 +243,7 @@ struct fsnotify_mark { __u32 mask; /* We hold one for presence in g_list. Also one ref for each 'thing' * in kernel that found and may be using this mark. */ - atomic_t refcnt; + refcount_t refcnt; /* Group this mark is for. Set on mark creation, stable until last ref * is dropped */ struct fsnotify_group *group; diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index d4b050d9a66e..fd353120e0d9 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -1008,7 +1008,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify * We are guaranteed to have at least one reference to the mark from * either the inode or the caller of fsnotify_destroy_mark(). */ - BUG_ON(atomic_read(&entry->refcnt) < 1); + BUG_ON(refcount_read(&entry->refcnt) < 1); } static const struct fsnotify_ops audit_tree_ops = { |