diff options
Diffstat (limited to 'fs')
36 files changed, 1157 insertions, 914 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 532acae25453..546874057bd3 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -5,7 +5,7 @@ afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o -kafs-objs := \ +kafs-y := \ $(afs-cache-y) \ addr_list.o \ callback.o \ @@ -21,7 +21,6 @@ kafs-objs := \ main.o \ misc.o \ mntpt.o \ - proc.o \ rotate.o \ rxrpc.o \ security.o \ @@ -34,4 +33,5 @@ kafs-objs := \ write.o \ xattr.o +kafs-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_AFS_FS) := kafs.o diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 2c46c46f3a6d..025a9a5e1c32 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -215,7 +215,7 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) _enter("%s", cell->name); ret = dns_query("afsdb", cell->name, cell->name_len, - "ipv4", &vllist, _expiry); + "", &vllist, _expiry); if (ret < 0) return ERR_PTR(ret); diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 571437dcb252..5f261fbf2182 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -21,6 +21,66 @@ #include "internal.h" /* + * Create volume and callback interests on a server. + */ +static struct afs_cb_interest *afs_create_interest(struct afs_server *server, + struct afs_vnode *vnode) +{ + struct afs_vol_interest *new_vi, *vi; + struct afs_cb_interest *new; + struct hlist_node **pp; + + new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL); + if (!new_vi) + return NULL; + + new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL); + if (!new) { + kfree(new_vi); + return NULL; + } + + new_vi->usage = 1; + new_vi->vid = vnode->volume->vid; + INIT_HLIST_NODE(&new_vi->srv_link); + INIT_HLIST_HEAD(&new_vi->cb_interests); + + refcount_set(&new->usage, 1); + new->sb = vnode->vfs_inode.i_sb; + new->vid = vnode->volume->vid; + new->server = afs_get_server(server); + INIT_HLIST_NODE(&new->cb_vlink); + + write_lock(&server->cb_break_lock); + + for (pp = &server->cb_volumes.first; *pp; pp = &(*pp)->next) { + vi = hlist_entry(*pp, struct afs_vol_interest, srv_link); + if (vi->vid < new_vi->vid) + continue; + if (vi->vid > new_vi->vid) + break; + vi->usage++; + goto found_vi; + } + + new_vi->srv_link.pprev = pp; + new_vi->srv_link.next = *pp; + if (*pp) + (*pp)->pprev = &new_vi->srv_link.next; + *pp = &new_vi->srv_link; + vi = new_vi; + new_vi = NULL; +found_vi: + + new->vol_interest = vi; + hlist_add_head(&new->cb_vlink, &vi->cb_interests); + + write_unlock(&server->cb_break_lock); + kfree(new_vi); + return new; +} + +/* * Set up an interest-in-callbacks record for a volume on a server and * register it with the server. * - Called with vnode->io_lock held. @@ -77,20 +137,10 @@ again: } if (!cbi) { - new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL); + new = afs_create_interest(server, vnode); if (!new) return -ENOMEM; - refcount_set(&new->usage, 1); - new->sb = vnode->vfs_inode.i_sb; - new->vid = vnode->volume->vid; - new->server = afs_get_server(server); - INIT_LIST_HEAD(&new->cb_link); - - write_lock(&server->cb_break_lock); - list_add_tail(&new->cb_link, &server->cb_interests); - write_unlock(&server->cb_break_lock); - write_lock(&slist->lock); if (!entry->cb_interest) { entry->cb_interest = afs_get_cb_interest(new); @@ -126,11 +176,22 @@ again: */ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) { + struct afs_vol_interest *vi; + if (cbi && refcount_dec_and_test(&cbi->usage)) { - if (!list_empty(&cbi->cb_link)) { + if (!hlist_unhashed(&cbi->cb_vlink)) { write_lock(&cbi->server->cb_break_lock); - list_del_init(&cbi->cb_link); + + hlist_del_init(&cbi->cb_vlink); + vi = cbi->vol_interest; + cbi->vol_interest = NULL; + if (--vi->usage == 0) + hlist_del(&vi->srv_link); + else + vi = NULL; + write_unlock(&cbi->server->cb_break_lock); + kfree(vi); afs_put_server(net, cbi->server); } kfree(cbi); @@ -182,20 +243,34 @@ void afs_break_callback(struct afs_vnode *vnode) static void afs_break_one_callback(struct afs_server *server, struct afs_fid *fid) { + struct afs_vol_interest *vi; struct afs_cb_interest *cbi; struct afs_iget_data data; struct afs_vnode *vnode; struct inode *inode; read_lock(&server->cb_break_lock); + hlist_for_each_entry(vi, &server->cb_volumes, srv_link) { + if (vi->vid < fid->vid) + continue; + if (vi->vid > fid->vid) { + vi = NULL; + break; + } + //atomic_inc(&vi->usage); + break; + } + + /* TODO: Find all matching volumes if we couldn't match the server and + * break them anyway. + */ + if (!vi) + goto out; /* Step through all interested superblocks. There may be more than one * because of cell aliasing. */ - list_for_each_entry(cbi, &server->cb_interests, cb_link) { - if (cbi->vid != fid->vid) - continue; - + hlist_for_each_entry(cbi, &vi->cb_interests, cb_vlink) { if (fid->vnode == 0 && fid->unique == 0) { /* The callback break applies to an entire volume. */ struct afs_super_info *as = AFS_FS_S(cbi->sb); @@ -217,6 +292,7 @@ static void afs_break_one_callback(struct afs_server *server, } } +out: read_unlock(&server->cb_break_lock); } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index fdf4c36cff79..f3d0bef16d78 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -15,6 +15,7 @@ #include <linux/dns_resolver.h> #include <linux/sched.h> #include <linux/inet.h> +#include <linux/namei.h> #include <keys/rxrpc-type.h> #include "internal.h" @@ -341,8 +342,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) /* install the new cell */ write_seqlock(&net->cells_lock); - old_root = net->ws_cell; - net->ws_cell = new_root; + old_root = rcu_access_pointer(net->ws_cell); + rcu_assign_pointer(net->ws_cell, new_root); write_sequnlock(&net->cells_lock); afs_put_cell(net, old_root); @@ -528,12 +529,14 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) NULL, 0, cell, 0, true); #endif - ret = afs_proc_cell_setup(net, cell); + ret = afs_proc_cell_setup(cell); if (ret < 0) return ret; - spin_lock(&net->proc_cells_lock); + + mutex_lock(&net->proc_cells_lock); list_add_tail(&cell->proc_link, &net->proc_cells); - spin_unlock(&net->proc_cells_lock); + afs_dynroot_mkdir(net, cell); + mutex_unlock(&net->proc_cells_lock); return 0; } @@ -544,11 +547,12 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) { _enter("%s", cell->name); - afs_proc_cell_remove(net, cell); + afs_proc_cell_remove(cell); - spin_lock(&net->proc_cells_lock); + mutex_lock(&net->proc_cells_lock); list_del_init(&cell->proc_link); - spin_unlock(&net->proc_cells_lock); + afs_dynroot_rmdir(net, cell); + mutex_unlock(&net->proc_cells_lock); #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(cell->cache, NULL, false); @@ -755,8 +759,8 @@ void afs_cell_purge(struct afs_net *net) _enter(""); write_seqlock(&net->cells_lock); - ws = net->ws_cell; - net->ws_cell = NULL; + ws = rcu_access_pointer(net->ws_cell); + RCU_INIT_POINTER(net->ws_cell, NULL); write_sequnlock(&net->cells_lock); afs_put_cell(net, ws); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 238fd28cfdd2..9e51d6fe7e8f 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -526,7 +526,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) nifs = 0; ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL); if (ifs) { - nifs = afs_get_ipv4_interfaces(ifs, 32, false); + nifs = afs_get_ipv4_interfaces(call->net, ifs, 32, false); if (nifs < 0) { kfree(ifs); ifs = NULL; diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 983f3946ab57..174e843f0633 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -1,4 +1,4 @@ -/* dir.c: AFS dynamic root handling +/* AFS dynamic root handling * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) @@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry) return 0; } - ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL); + ret = dns_query("afsdb", name, len, "", NULL, NULL); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; @@ -207,3 +207,125 @@ const struct dentry_operations afs_dynroot_dentry_operations = { .d_release = afs_d_release, .d_automount = afs_d_automount, }; + +/* + * Create a manually added cell mount directory. + * - The caller must hold net->proc_cells_lock + */ +int afs_dynroot_mkdir(struct afs_net *net, struct afs_cell *cell) +{ + struct super_block *sb = net->dynroot_sb; + struct dentry *root, *subdir; + int ret; + + if (!sb || atomic_read(&sb->s_active) == 0) + return 0; + + /* Let the ->lookup op do the creation */ + root = sb->s_root; + inode_lock(root->d_inode); + subdir = lookup_one_len(cell->name, root, cell->name_len); + if (IS_ERR(subdir)) { + ret = PTR_ERR(subdir); + goto unlock; + } + + /* Note that we're retaining an extra ref on the dentry */ + subdir->d_fsdata = (void *)1UL; + ret = 0; +unlock: + inode_unlock(root->d_inode); + return ret; +} + +/* + * Remove a manually added cell mount directory. + * - The caller must hold net->proc_cells_lock + */ +void afs_dynroot_rmdir(struct afs_net *net, struct afs_cell *cell) +{ + struct super_block *sb = net->dynroot_sb; + struct dentry *root, *subdir; + + if (!sb || atomic_read(&sb->s_active) == 0) + return; + + root = sb->s_root; + inode_lock(root->d_inode); + + /* Don't want to trigger a lookup call, which will re-add the cell */ + subdir = try_lookup_one_len(cell->name, root, cell->name_len); + if (IS_ERR_OR_NULL(subdir)) { + _debug("lookup %ld", PTR_ERR(subdir)); + goto no_dentry; + } + + _debug("rmdir %pd %u", subdir, d_count(subdir)); + + if (subdir->d_fsdata) { + _debug("unpin %u", d_count(subdir)); + subdir->d_fsdata = NULL; + dput(subdir); + } + dput(subdir); +no_dentry: + inode_unlock(root->d_inode); + _leave(""); +} + +/* + * Populate a newly created dynamic root with cell names. + */ +int afs_dynroot_populate(struct super_block *sb) +{ + struct afs_cell *cell; + struct afs_net *net = afs_sb2net(sb); + int ret; + + if (mutex_lock_interruptible(&net->proc_cells_lock) < 0) + return -ERESTARTSYS; + + net->dynroot_sb = sb; + list_for_each_entry(cell, &net->proc_cells, proc_link) { + ret = afs_dynroot_mkdir(net, cell); + if (ret < 0) + goto error; + } + + ret = 0; +out: + mutex_unlock(&net->proc_cells_lock); + return ret; + +error: + net->dynroot_sb = NULL; + goto out; +} + +/* + * When a dynamic root that's in the process of being destroyed, depopulate it + * of pinned directories. + */ +void afs_dynroot_depopulate(struct super_block *sb) +{ + struct afs_net *net = afs_sb2net(sb); + struct dentry *root = sb->s_root, *subdir, *tmp; + + /* Prevent more subdirs from being created */ + mutex_lock(&net->proc_cells_lock); + if (net->dynroot_sb == sb) + net->dynroot_sb = NULL; + mutex_unlock(&net->proc_cells_lock); + + inode_lock(root->d_inode); + + /* Remove all the pins for dirs created for manually added cells */ + list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) { + if (subdir->d_fsdata) { + subdir->d_fsdata = NULL; + dput(subdir); + } + } + + inode_unlock(root->d_inode); +} diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 5907601aafd0..50929cb91732 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -138,10 +138,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, u64 data_version, size; u32 type, abort_code; u8 flags = 0; - int ret; - - if (vnode) - write_seqlock(&vnode->cb_lock); abort_code = ntohl(xdr->abort_code); @@ -154,8 +150,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, * case. */ status->abort_code = abort_code; - ret = 0; - goto out; + return 0; } pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); @@ -164,8 +159,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, if (abort_code != 0 && inline_error) { status->abort_code = abort_code; - ret = 0; - goto out; + return 0; } type = ntohl(xdr->type); @@ -235,17 +229,35 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, flags); } - ret = 0; - -out: - if (vnode) - write_sequnlock(&vnode->cb_lock); - return ret; + return 0; bad: xdr_dump_bad(*_bp); - ret = afs_protocol_error(call, -EBADMSG); - goto out; + return afs_protocol_error(call, -EBADMSG); +} + +/* + * Decode the file status. We need to lock the target vnode if we're going to + * update its status so that stat() sees the attributes update atomically. + */ +static int afs_decode_status(struct afs_call *call, + const __be32 **_bp, + struct afs_file_status *status, + struct afs_vnode *vnode, + const afs_dataversion_t *expected_version, + struct afs_read *read_req) +{ + int ret; + + if (!vnode) + return xdr_decode_AFSFetchStatus(call, _bp, status, vnode, + expected_version, read_req); + + write_seqlock(&vnode->cb_lock); + ret = xdr_decode_AFSFetchStatus(call, _bp, status, vnode, + expected_version, read_req); + write_sequnlock(&vnode->cb_lock); + return ret; } /* @@ -387,8 +399,8 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) @@ -568,8 +580,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) return ret; bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &vnode->status.data_version, req) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &vnode->status.data_version, req) < 0) return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) @@ -721,9 +733,9 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 || - xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 || + afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack_raw(&bp, call->reply[3]); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -827,8 +839,8 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -917,9 +929,9 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 || - xdr_decode_AFSFetchStatus(call, &bp, &dvnode->status, dvnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 || + afs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -1004,9 +1016,9 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) || - xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) || + afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -1110,12 +1122,12 @@ static int afs_deliver_fs_rename(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &orig_dvnode->status, orig_dvnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); if (new_dvnode != orig_dvnode && - xdr_decode_AFSFetchStatus(call, &bp, &new_dvnode->status, new_dvnode, - &call->expected_version_2, NULL) < 0) + afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, + &call->expected_version_2, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -1219,8 +1231,8 @@ static int afs_deliver_fs_store_data(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -1395,8 +1407,8 @@ static int afs_deliver_fs_store_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -2097,8 +2109,8 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(call, &bp, status, vnode, - &call->expected_version, NULL); + afs_decode_status(call, &bp, status, vnode, + &call->expected_version, NULL); callback[call->count].version = ntohl(bp[0]); callback[call->count].expiry = ntohl(bp[1]); callback[call->count].type = ntohl(bp[2]); @@ -2209,9 +2221,9 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) bp = call->buffer; statuses = call->reply[1]; - if (xdr_decode_AFSFetchStatus(call, &bp, &statuses[call->count], - call->count == 0 ? vnode : NULL, - NULL, NULL) < 0) + if (afs_decode_status(call, &bp, &statuses[call->count], + call->count == 0 ? vnode : NULL, + NULL, NULL) < 0) return afs_protocol_error(call, -EBADMSG); call->count++; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e3f8a46663db..9778df135717 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -22,6 +22,8 @@ #include <linux/backing-dev.h> #include <linux/uuid.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/sock.h> #include <net/af_rxrpc.h> #include "afs.h" @@ -40,7 +42,8 @@ struct afs_mount_params { afs_voltype_t type; /* type of volume requested */ int volnamesz; /* size of volume name */ const char *volname; /* name of volume to mount */ - struct afs_net *net; /* Network namespace in effect */ + struct net *net_ns; /* Network namespace in effect */ + struct afs_net *net; /* the AFS net namespace stuff */ struct afs_cell *cell; /* cell in which to find volume */ struct afs_volume *volume; /* volume record */ struct key *key; /* key to use for secure mounting */ @@ -189,7 +192,7 @@ struct afs_read { * - there's one superblock per volume */ struct afs_super_info { - struct afs_net *net; /* Network namespace */ + struct net *net_ns; /* Network namespace */ struct afs_cell *cell; /* The cell in which the volume resides */ struct afs_volume *volume; /* volume record */ bool dyn_root; /* True if dynamic root */ @@ -210,7 +213,6 @@ struct afs_sysnames { char *subs[AFS_NR_SYSNAME]; refcount_t usage; unsigned short nr; - short error; char blank[1]; }; @@ -218,6 +220,7 @@ struct afs_sysnames { * AFS network namespace record. */ struct afs_net { + struct net *net; /* Backpointer to the owning net namespace */ struct afs_uuid uuid; bool live; /* F if this namespace is being removed */ @@ -231,13 +234,13 @@ struct afs_net { /* Cell database */ struct rb_root cells; - struct afs_cell *ws_cell; + struct afs_cell __rcu *ws_cell; struct work_struct cells_manager; struct timer_list cells_timer; atomic_t cells_outstanding; seqlock_t cells_lock; - spinlock_t proc_cells_lock; + struct mutex proc_cells_lock; struct list_head proc_cells; /* Known servers. Theoretically each fileserver can only be in one @@ -261,6 +264,7 @@ struct afs_net { struct mutex lock_manager_mutex; /* Misc */ + struct super_block *dynroot_sb; /* Dynamic root mount superblock */ struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ struct afs_sysnames *sysnames; rwlock_t sysnames_lock; @@ -280,7 +284,6 @@ struct afs_net { }; extern const char afs_init_sysname[]; -extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns enum afs_cell_state { AFS_CELL_UNSET, @@ -404,16 +407,27 @@ struct afs_server { rwlock_t fs_lock; /* access lock */ /* callback promise management */ - struct list_head cb_interests; /* List of superblocks using this server */ + struct hlist_head cb_volumes; /* List of volume interests on this server */ unsigned cb_s_break; /* Break-everything counter. */ rwlock_t cb_break_lock; /* Volume finding lock */ }; /* + * Volume collation in the server's callback interest list. + */ +struct afs_vol_interest { + struct hlist_node srv_link; /* Link in server->cb_volumes */ + struct hlist_head cb_interests; /* List of callback interests on the server */ + afs_volid_t vid; /* Volume ID to match */ + unsigned int usage; +}; + +/* * Interest by a superblock on a server. */ struct afs_cb_interest { - struct list_head cb_link; /* Link in server->cb_interests */ + struct hlist_node cb_vlink; /* Link in vol_interest->cb_interests */ + struct afs_vol_interest *vol_interest; struct afs_server *server; /* Server on which this interest resides */ struct super_block *sb; /* Superblock on which inodes reside */ afs_volid_t vid; /* Volume ID to match */ @@ -720,6 +734,10 @@ extern const struct inode_operations afs_dynroot_inode_operations; extern const struct dentry_operations afs_dynroot_dentry_operations; extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *); +extern int afs_dynroot_mkdir(struct afs_net *, struct afs_cell *); +extern void afs_dynroot_rmdir(struct afs_net *, struct afs_cell *); +extern int afs_dynroot_populate(struct super_block *); +extern void afs_dynroot_depopulate(struct super_block *); /* * file.c @@ -806,34 +824,36 @@ extern int afs_drop_inode(struct inode *); * main.c */ extern struct workqueue_struct *afs_wq; +extern int afs_net_id; -static inline struct afs_net *afs_d2net(struct dentry *dentry) +static inline struct afs_net *afs_net(struct net *net) { - return &__afs_net; + return net_generic(net, afs_net_id); } -static inline struct afs_net *afs_i2net(struct inode *inode) +static inline struct afs_net *afs_sb2net(struct super_block *sb) { - return &__afs_net; + return afs_net(AFS_FS_S(sb)->net_ns); } -static inline struct afs_net *afs_v2net(struct afs_vnode *vnode) +static inline struct afs_net *afs_d2net(struct dentry *dentry) { - return &__afs_net; + return afs_sb2net(dentry->d_sb); } -static inline struct afs_net *afs_sock2net(struct sock *sk) +static inline struct afs_net *afs_i2net(struct inode *inode) { - return &__afs_net; + return afs_sb2net(inode->i_sb); } -static inline struct afs_net *afs_get_net(struct afs_net *net) +static inline struct afs_net *afs_v2net(struct afs_vnode *vnode) { - return net; + return afs_i2net(&vnode->vfs_inode); } -static inline void afs_put_net(struct afs_net *net) +static inline struct afs_net *afs_sock2net(struct sock *sk) { + return net_generic(sock_net(sk), afs_net_id); } static inline void __afs_stat(atomic_t *s) @@ -861,16 +881,25 @@ extern void afs_mntpt_kill_timer(void); /* * netdevices.c */ -extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool); +extern int afs_get_ipv4_interfaces(struct afs_net *, struct afs_interface *, + size_t, bool); /* * proc.c */ +#ifdef CONFIG_PROC_FS extern int __net_init afs_proc_init(struct afs_net *); extern void __net_exit afs_proc_cleanup(struct afs_net *); -extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *); -extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *); +extern int afs_proc_cell_setup(struct afs_cell *); +extern void afs_proc_cell_remove(struct afs_cell *); extern void afs_put_sysnames(struct afs_sysnames *); +#else +static inline int afs_proc_init(struct afs_net *net) { return 0; } +static inline void afs_proc_cleanup(struct afs_net *net) {} +static inline int afs_proc_cell_setup(struct afs_cell *cell) { return 0; } +static inline void afs_proc_cell_remove(struct afs_cell *cell) {} +static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {} +#endif /* * rotate.c @@ -1002,7 +1031,7 @@ extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server * super.c */ extern int __init afs_fs_init(void); -extern void __exit afs_fs_exit(void); +extern void afs_fs_exit(void); /* * vlclient.c diff --git a/fs/afs/main.c b/fs/afs/main.c index d7560168b3bf..e84fe822a960 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -15,6 +15,7 @@ #include <linux/completion.h> #include <linux/sched.h> #include <linux/random.h> +#include <linux/proc_fs.h> #define CREATE_TRACE_POINTS #include "internal.h" @@ -32,7 +33,7 @@ module_param(rootcell, charp, 0); MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); struct workqueue_struct *afs_wq; -struct afs_net __afs_net; +static struct proc_dir_entry *afs_proc_symlink; #if defined(CONFIG_ALPHA) const char afs_init_sysname[] = "alpha_linux26"; @@ -67,11 +68,13 @@ const char afs_init_sysname[] = "unknown_linux26"; /* * Initialise an AFS network namespace record. */ -static int __net_init afs_net_init(struct afs_net *net) +static int __net_init afs_net_init(struct net *net_ns) { struct afs_sysnames *sysnames; + struct afs_net *net = afs_net(net_ns); int ret; + net->net = net_ns; net->live = true; generate_random_uuid((unsigned char *)&net->uuid); @@ -83,7 +86,7 @@ static int __net_init afs_net_init(struct afs_net *net) INIT_WORK(&net->cells_manager, afs_manage_cells); timer_setup(&net->cells_timer, afs_cells_timer, 0); - spin_lock_init(&net->proc_cells_lock); + mutex_init(&net->proc_cells_lock); INIT_LIST_HEAD(&net->proc_cells); seqlock_init(&net->fs_lock); @@ -142,8 +145,10 @@ error_sysnames: /* * Clean up and destroy an AFS network namespace record. */ -static void __net_exit afs_net_exit(struct afs_net *net) +static void __net_exit afs_net_exit(struct net *net_ns) { + struct afs_net *net = afs_net(net_ns); + net->live = false; afs_cell_purge(net); afs_purge_servers(net); @@ -152,6 +157,13 @@ static void __net_exit afs_net_exit(struct afs_net *net) afs_put_sysnames(net->sysnames); } +static struct pernet_operations afs_net_ops = { + .init = afs_net_init, + .exit = afs_net_exit, + .id = &afs_net_id, + .size = sizeof(struct afs_net), +}; + /* * initialise the AFS client FS module */ @@ -178,7 +190,7 @@ static int __init afs_init(void) goto error_cache; #endif - ret = afs_net_init(&__afs_net); + ret = register_pernet_subsys(&afs_net_ops); if (ret < 0) goto error_net; @@ -187,10 +199,18 @@ static int __init afs_init(void) if (ret < 0) goto error_fs; + afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs"); + if (IS_ERR(afs_proc_symlink)) { + ret = PTR_ERR(afs_proc_symlink); + goto error_proc; + } + return ret; +error_proc: + afs_fs_exit(); error_fs: - afs_net_exit(&__afs_net); + unregister_pernet_subsys(&afs_net_ops); error_net: #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); @@ -219,8 +239,9 @@ static void __exit afs_exit(void) { printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); + proc_remove(afs_proc_symlink); afs_fs_exit(); - afs_net_exit(&__afs_net); + unregister_pernet_subsys(&afs_net_ops); #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); #endif diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c index 50bd5bb1c4fb..2a009d1939d7 100644 --- a/fs/afs/netdevices.c +++ b/fs/afs/netdevices.c @@ -17,8 +17,8 @@ * - maxbufs must be at least 1 * - returns the number of interface records in the buffer */ -int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs, - bool wantloopback) +int afs_get_ipv4_interfaces(struct afs_net *net, struct afs_interface *bufs, + size_t maxbufs, bool wantloopback) { struct net_device *dev; struct in_device *idev; @@ -27,7 +27,7 @@ int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs, ASSERT(maxbufs > 0); rtnl_lock(); - for_each_netdev(&init_net, dev) { + for_each_netdev(net->net, dev) { if (dev->type == ARPHRD_LOOPBACK && !wantloopback) continue; idev = __in_dev_get_rtnl(dev); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 3aad32762989..0c3285c8db95 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -17,240 +17,78 @@ #include <linux/uaccess.h> #include "internal.h" -static inline struct afs_net *afs_proc2net(struct file *f) +static inline struct afs_net *afs_seq2net(struct seq_file *m) { - return &__afs_net; + return afs_net(seq_file_net(m)); } -static inline struct afs_net *afs_seq2net(struct seq_file *m) +static inline struct afs_net *afs_seq2net_single(struct seq_file *m) { - return &__afs_net; // TODO: use seq_file_net(m) + return afs_net(seq_file_single_net(m)); } -static int afs_proc_cells_open(struct inode *inode, struct file *file); -static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos); -static void afs_proc_cells_stop(struct seq_file *p, void *v); -static int afs_proc_cells_show(struct seq_file *m, void *v); -static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, - size_t size, loff_t *_pos); - -static const struct seq_operations afs_proc_cells_ops = { - .start = afs_proc_cells_start, - .next = afs_proc_cells_next, - .stop = afs_proc_cells_stop, - .show = afs_proc_cells_show, -}; - -static const struct file_operations afs_proc_cells_fops = { - .open = afs_proc_cells_open, - .read = seq_read, - .write = afs_proc_cells_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf, - size_t size, loff_t *_pos); -static ssize_t afs_proc_rootcell_write(struct file *file, - const char __user *buf, - size_t size, loff_t *_pos); - -static const struct file_operations afs_proc_rootcell_fops = { - .read = afs_proc_rootcell_read, - .write = afs_proc_rootcell_write, - .llseek = no_llseek, -}; - -static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, - loff_t *pos); -static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v); -static int afs_proc_cell_volumes_show(struct seq_file *m, void *v); - -static const struct seq_operations afs_proc_cell_volumes_ops = { - .start = afs_proc_cell_volumes_start, - .next = afs_proc_cell_volumes_next, - .stop = afs_proc_cell_volumes_stop, - .show = afs_proc_cell_volumes_show, -}; - -static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, - loff_t *pos); -static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v); -static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v); - -static const struct seq_operations afs_proc_cell_vlservers_ops = { - .start = afs_proc_cell_vlservers_start, - .next = afs_proc_cell_vlservers_next, - .stop = afs_proc_cell_vlservers_stop, - .show = afs_proc_cell_vlservers_show, -}; - -static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_servers_next(struct seq_file *p, void *v, - loff_t *pos); -static void afs_proc_servers_stop(struct seq_file *p, void *v); -static int afs_proc_servers_show(struct seq_file *m, void *v); - -static const struct seq_operations afs_proc_servers_ops = { - .start = afs_proc_servers_start, - .next = afs_proc_servers_next, - .stop = afs_proc_servers_stop, - .show = afs_proc_servers_show, -}; - -static int afs_proc_sysname_open(struct inode *inode, struct file *file); -static int afs_proc_sysname_release(struct inode *inode, struct file *file); -static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_sysname_next(struct seq_file *p, void *v, - loff_t *pos); -static void afs_proc_sysname_stop(struct seq_file *p, void *v); -static int afs_proc_sysname_show(struct seq_file *m, void *v); -static ssize_t afs_proc_sysname_write(struct file *file, - const char __user *buf, - size_t size, loff_t *_pos); - -static const struct seq_operations afs_proc_sysname_ops = { - .start = afs_proc_sysname_start, - .next = afs_proc_sysname_next, - .stop = afs_proc_sysname_stop, - .show = afs_proc_sysname_show, -}; - -static const struct file_operations afs_proc_sysname_fops = { - .open = afs_proc_sysname_open, - .read = seq_read, - .llseek = seq_lseek, - .release = afs_proc_sysname_release, - .write = afs_proc_sysname_write, -}; - -static int afs_proc_stats_show(struct seq_file *m, void *v); - /* - * initialise the /proc/fs/afs/ directory + * Display the list of cells known to the namespace. */ -int afs_proc_init(struct afs_net *net) +static int afs_proc_cells_show(struct seq_file *m, void *v) { - _enter(""); - - net->proc_afs = proc_mkdir("fs/afs", NULL); - if (!net->proc_afs) - goto error_dir; + struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); + struct afs_net *net = afs_seq2net(m); - if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || - !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) || - !proc_create_seq("servers", 0644, net->proc_afs, &afs_proc_servers_ops) || - !proc_create_single("stats", 0644, net->proc_afs, afs_proc_stats_show) || - !proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops)) - goto error_tree; + if (v == &net->proc_cells) { + /* display header on line 1 */ + seq_puts(m, "USE NAME\n"); + return 0; + } - _leave(" = 0"); + /* display one cell per line on subsequent lines */ + seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name); return 0; - -error_tree: - proc_remove(net->proc_afs); -error_dir: - _leave(" = -ENOMEM"); - return -ENOMEM; -} - -/* - * clean up the /proc/fs/afs/ directory - */ -void afs_proc_cleanup(struct afs_net *net) -{ - proc_remove(net->proc_afs); - net->proc_afs = NULL; -} - -/* - * open "/proc/fs/afs/cells" which provides a summary of extant cells - */ -static int afs_proc_cells_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &afs_proc_cells_ops); } -/* - * set up the iterator to start reading from the cells list and return the - * first item - */ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { - struct afs_net *net = afs_seq2net(m); - rcu_read_lock(); - return seq_list_start_head(&net->proc_cells, *_pos); + return seq_list_start_head(&afs_seq2net(m)->proc_cells, *_pos); } -/* - * move to next cell in cells list - */ static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) { - struct afs_net *net = afs_seq2net(m); - - return seq_list_next(v, &net->proc_cells, pos); + return seq_list_next(v, &afs_seq2net(m)->proc_cells, pos); } -/* - * clean up after reading from the cells list - */ static void afs_proc_cells_stop(struct seq_file *m, void *v) __releases(rcu) { rcu_read_unlock(); } -/* - * display a header line followed by a load of cell lines - */ -static int afs_proc_cells_show(struct seq_file *m, void *v) -{ - struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); - struct afs_net *net = afs_seq2net(m); - - if (v == &net->proc_cells) { - /* display header on line 1 */ - seq_puts(m, "USE NAME\n"); - return 0; - } - - /* display one cell per line on subsequent lines */ - seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name); - return 0; -} +static const struct seq_operations afs_proc_cells_ops = { + .start = afs_proc_cells_start, + .next = afs_proc_cells_next, + .stop = afs_proc_cells_stop, + .show = afs_proc_cells_show, +}; /* * handle writes to /proc/fs/afs/cells * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]" */ -static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, - size_t size, loff_t *_pos) +static int afs_proc_cells_write(struct file *file, char *buf, size_t size) { - struct afs_net *net = afs_proc2net(file); - char *kbuf, *name, *args; + struct seq_file *m = file->private_data; + struct afs_net *net = afs_seq2net(m); + char *name, *args; int ret; - /* start by dragging the command into memory */ - if (size <= 1 || size >= PAGE_SIZE) - return -EINVAL; - - kbuf = memdup_user_nul(buf, size); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - /* trim to first NL */ - name = memchr(kbuf, '\n', size); + name = memchr(buf, '\n', size); if (name) *name = 0; /* split into command, name and argslist */ - name = strchr(kbuf, ' '); + name = strchr(buf, ' '); if (!name) goto inval; do { @@ -269,9 +107,9 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, goto inval; /* determine command to perform */ - _debug("cmd=%s name=%s args=%s", kbuf, name, args); + _debug("cmd=%s name=%s args=%s", buf, name, args); - if (strcmp(kbuf, "add") == 0) { + if (strcmp(buf, "add") == 0) { struct afs_cell *cell; cell = afs_lookup_cell(net, name, strlen(name), args, true); @@ -287,10 +125,9 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, goto inval; } - ret = size; + ret = 0; done: - kfree(kbuf); _leave(" = %d", ret); return ret; @@ -300,200 +137,136 @@ inval: goto done; } -static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf, - size_t size, loff_t *_pos) +/* + * Display the name of the current workstation cell. + */ +static int afs_proc_rootcell_show(struct seq_file *m, void *v) { struct afs_cell *cell; - struct afs_net *net = afs_proc2net(file); - unsigned int seq = 0; - char name[AFS_MAXCELLNAME + 1]; - int len; - - if (*_pos > 0) - return 0; - if (!net->ws_cell) - return 0; - - rcu_read_lock(); - do { - read_seqbegin_or_lock(&net->cells_lock, &seq); - len = 0; - cell = rcu_dereference_raw(net->ws_cell); - if (cell) { - len = cell->name_len; - memcpy(name, cell->name, len); - } - } while (need_seqretry(&net->cells_lock, seq)); - done_seqretry(&net->cells_lock, seq); - rcu_read_unlock(); - - if (!len) - return 0; - - name[len++] = '\n'; - if (len > size) - len = size; - if (copy_to_user(buf, name, len) != 0) - return -EFAULT; - *_pos = 1; - return len; + struct afs_net *net; + + net = afs_seq2net_single(m); + if (rcu_access_pointer(net->ws_cell)) { + rcu_read_lock(); + cell = rcu_dereference(net->ws_cell); + if (cell) + seq_printf(m, "%s\n", cell->name); + rcu_read_unlock(); + } + return 0; } /* - * handle writes to /proc/fs/afs/rootcell - * - to initialize rootcell: echo "cell.name:192.168.231.14" + * Set the current workstation cell and optionally supply its list of volume + * location servers. + * + * echo "cell.name:192.168.231.14" >/proc/fs/afs/rootcell */ -static ssize_t afs_proc_rootcell_write(struct file *file, - const char __user *buf, - size_t size, loff_t *_pos) +static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size) { - struct afs_net *net = afs_proc2net(file); - char *kbuf, *s; + struct seq_file *m = file->private_data; + struct afs_net *net = afs_seq2net_single(m); + char *s; int ret; - /* start by dragging the command into memory */ - if (size <= 1 || size >= PAGE_SIZE) - return -EINVAL; - - kbuf = memdup_user_nul(buf, size); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - ret = -EINVAL; - if (kbuf[0] == '.') + if (buf[0] == '.') goto out; - if (memchr(kbuf, '/', size)) + if (memchr(buf, '/', size)) goto out; /* trim to first NL */ - s = memchr(kbuf, '\n', size); + s = memchr(buf, '\n', size); if (s) *s = 0; /* determine command to perform */ - _debug("rootcell=%s", kbuf); + _debug("rootcell=%s", buf); - ret = afs_cell_init(net, kbuf); - if (ret >= 0) - ret = size; /* consume everything, always */ + ret = afs_cell_init(net, buf); out: - kfree(kbuf); _leave(" = %d", ret); return ret; } +static const char afs_vol_types[3][3] = { + [AFSVL_RWVOL] = "RW", + [AFSVL_ROVOL] = "RO", + [AFSVL_BACKVOL] = "BK", +}; + /* - * initialise /proc/fs/afs/<cell>/ + * Display the list of volumes known to a cell. */ -int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) +static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) { - struct proc_dir_entry *dir; - - _enter("%p{%s},%p", cell, cell->name, net->proc_afs); + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); - dir = proc_mkdir(cell->name, net->proc_afs); - if (!dir) - goto error_dir; + /* Display header on line 1 */ + if (v == &cell->proc_volumes) { + seq_puts(m, "USE VID TY\n"); + return 0; + } - if (!proc_create_seq_data("vlservers", 0, dir, - &afs_proc_cell_vlservers_ops, cell)) - goto error_tree; - if (!proc_create_seq_data("volumes", 0, dir, &afs_proc_cell_volumes_ops, - cell)) - goto error_tree; + seq_printf(m, "%3d %08x %s\n", + atomic_read(&vol->usage), vol->vid, + afs_vol_types[vol->type]); - _leave(" = 0"); return 0; - -error_tree: - remove_proc_subtree(cell->name, net->proc_afs); -error_dir: - _leave(" = -ENOMEM"); - return -ENOMEM; } -/* - * remove /proc/fs/afs/<cell>/ - */ -void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell) -{ - _enter(""); - - remove_proc_subtree(cell->name, net->proc_afs); - - _leave(""); -} - -/* - * set up the iterator to start reading from the cells list and return the - * first item - */ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) __acquires(cell->proc_lock) { struct afs_cell *cell = PDE_DATA(file_inode(m->file)); - _enter("cell=%p pos=%Ld", cell, *_pos); - read_lock(&cell->proc_lock); return seq_list_start_head(&cell->proc_volumes, *_pos); } -/* - * move to next cell in cells list - */ -static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, +static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_cell *cell = PDE_DATA(file_inode(p->file)); + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); - _enter("cell=%p pos=%Ld", cell, *_pos); return seq_list_next(v, &cell->proc_volumes, _pos); } -/* - * clean up after reading from the cells list - */ -static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v) +static void afs_proc_cell_volumes_stop(struct seq_file *m, void *v) __releases(cell->proc_lock) { - struct afs_cell *cell = PDE_DATA(file_inode(p->file)); + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); read_unlock(&cell->proc_lock); } -static const char afs_vol_types[3][3] = { - [AFSVL_RWVOL] = "RW", - [AFSVL_ROVOL] = "RO", - [AFSVL_BACKVOL] = "BK", +static const struct seq_operations afs_proc_cell_volumes_ops = { + .start = afs_proc_cell_volumes_start, + .next = afs_proc_cell_volumes_next, + .stop = afs_proc_cell_volumes_stop, + .show = afs_proc_cell_volumes_show, }; /* - * display a header line followed by a load of volume lines + * Display the list of Volume Location servers we're using for a cell. */ -static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) +static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) { - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); - struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); + struct sockaddr_rxrpc *addr = v; - /* Display header on line 1 */ - if (v == &cell->proc_volumes) { - seq_puts(m, "USE VID TY\n"); + /* display header on line 1 */ + if (v == (void *)1) { + seq_puts(m, "ADDRESS\n"); return 0; } - seq_printf(m, "%3d %08x %s\n", - atomic_read(&vol->usage), vol->vid, - afs_vol_types[vol->type]); - + /* display one cell per line on subsequent lines */ + seq_printf(m, "%pISp\n", &addr->transport); return 0; } -/* - * set up the iterator to start reading from the cells list and return the - * first item - */ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { @@ -516,14 +289,11 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) return alist->addrs + pos; } -/* - * move to next cell in cells list - */ -static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, +static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v, loff_t *_pos) { struct afs_addr_list *alist; - struct afs_cell *cell = PDE_DATA(file_inode(p->file)); + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); loff_t pos; alist = rcu_dereference(cell->vl_addrs); @@ -536,161 +306,145 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, return alist->addrs + pos; } -/* - * clean up after reading from the cells list - */ -static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v) +static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v) __releases(rcu) { rcu_read_unlock(); } +static const struct seq_operations afs_proc_cell_vlservers_ops = { + .start = afs_proc_cell_vlservers_start, + .next = afs_proc_cell_vlservers_next, + .stop = afs_proc_cell_vlservers_stop, + .show = afs_proc_cell_vlservers_show, +}; + /* - * display a header line followed by a load of volume lines + * Display the list of fileservers we're using within a namespace. */ -static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) +static int afs_proc_servers_show(struct seq_file *m, void *v) { - struct sockaddr_rxrpc *addr = v; + struct afs_server *server; + struct afs_addr_list *alist; + int i; - /* display header on line 1 */ - if (v == (void *)1) { - seq_puts(m, "ADDRESS\n"); + if (v == SEQ_START_TOKEN) { + seq_puts(m, "UUID USE ADDR\n"); return 0; } - /* display one cell per line on subsequent lines */ - seq_printf(m, "%pISp\n", &addr->transport); + server = list_entry(v, struct afs_server, proc_link); + alist = rcu_dereference(server->addresses); + seq_printf(m, "%pU %3d %pISpc%s\n", + &server->uuid, + atomic_read(&server->usage), + &alist->addrs[0].transport, + alist->index == 0 ? "*" : ""); + for (i = 1; i < alist->nr_addrs; i++) + seq_printf(m, " %pISpc%s\n", + &alist->addrs[i].transport, + alist->index == i ? "*" : ""); return 0; } -/* - * Set up the iterator to start reading from the server list and return the - * first item. - */ static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { - struct afs_net *net = afs_seq2net(m); - rcu_read_lock(); - return seq_hlist_start_head_rcu(&net->fs_proc, *_pos); + return seq_hlist_start_head_rcu(&afs_seq2net(m)->fs_proc, *_pos); } -/* - * move to next cell in cells list - */ static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_net *net = afs_seq2net(m); - - return seq_hlist_next_rcu(v, &net->fs_proc, _pos); + return seq_hlist_next_rcu(v, &afs_seq2net(m)->fs_proc, _pos); } -/* - * clean up after reading from the cells list - */ -static void afs_proc_servers_stop(struct seq_file *p, void *v) +static void afs_proc_servers_stop(struct seq_file *m, void *v) __releases(rcu) { rcu_read_unlock(); } +static const struct seq_operations afs_proc_servers_ops = { + .start = afs_proc_servers_start, + .next = afs_proc_servers_next, + .stop = afs_proc_servers_stop, + .show = afs_proc_servers_show, +}; + /* - * display a header line followed by a load of volume lines + * Display the list of strings that may be substituted for the @sys pathname + * macro. */ -static int afs_proc_servers_show(struct seq_file *m, void *v) +static int afs_proc_sysname_show(struct seq_file *m, void *v) { - struct afs_server *server; - struct afs_addr_list *alist; - - if (v == SEQ_START_TOKEN) { - seq_puts(m, "UUID USE ADDR\n"); - return 0; - } + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *sysnames = net->sysnames; + unsigned int i = (unsigned long)v - 1; - server = list_entry(v, struct afs_server, proc_link); - alist = rcu_dereference(server->addresses); - seq_printf(m, "%pU %3d %pISp\n", - &server->uuid, - atomic_read(&server->usage), - &alist->addrs[alist->index].transport); + if (i < sysnames->nr) + seq_printf(m, "%s\n", sysnames->subs[i]); return 0; } -void afs_put_sysnames(struct afs_sysnames *sysnames) +static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos) + __acquires(&net->sysnames_lock) { - int i; + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names; - if (sysnames && refcount_dec_and_test(&sysnames->usage)) { - for (i = 0; i < sysnames->nr; i++) - if (sysnames->subs[i] != afs_init_sysname && - sysnames->subs[i] != sysnames->blank) - kfree(sysnames->subs[i]); - } + read_lock(&net->sysnames_lock); + + names = net->sysnames; + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); } -/* - * Handle opening of /proc/fs/afs/sysname. If it is opened for writing, we - * assume the caller wants to change the substitution list and we allocate a - * buffer to hold the list. - */ -static int afs_proc_sysname_open(struct inode *inode, struct file *file) +static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos) { - struct afs_sysnames *sysnames; - struct seq_file *m; - int ret; - - ret = seq_open(file, &afs_proc_sysname_ops); - if (ret < 0) - return ret; + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names = net->sysnames; - if (file->f_mode & FMODE_WRITE) { - sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); - if (!sysnames) { - seq_release(inode, file); - return -ENOMEM; - } + *pos += 1; + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); +} - refcount_set(&sysnames->usage, 1); - m = file->private_data; - m->private = sysnames; - } +static void afs_proc_sysname_stop(struct seq_file *m, void *v) + __releases(&net->sysnames_lock) +{ + struct afs_net *net = afs_seq2net(m); - return 0; + read_unlock(&net->sysnames_lock); } +static const struct seq_operations afs_proc_sysname_ops = { + .start = afs_proc_sysname_start, + .next = afs_proc_sysname_next, + .stop = afs_proc_sysname_stop, + .show = afs_proc_sysname_show, +}; + /* - * Handle writes to /proc/fs/afs/sysname to set the @sys substitution. + * Allow the @sys substitution to be configured. */ -static ssize_t afs_proc_sysname_write(struct file *file, - const char __user *buf, - size_t size, loff_t *_pos) +static int afs_proc_sysname_write(struct file *file, char *buf, size_t size) { - struct afs_sysnames *sysnames; + struct afs_sysnames *sysnames, *kill; struct seq_file *m = file->private_data; - char *kbuf = NULL, *s, *p, *sub; + struct afs_net *net = afs_seq2net(m); + char *s, *p, *sub; int ret, len; - sysnames = m->private; + sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); if (!sysnames) - return -EINVAL; - if (sysnames->error) - return sysnames->error; - - if (size >= PAGE_SIZE - 1) { - sysnames->error = -EINVAL; - return -EINVAL; - } - if (size == 0) - return 0; - - kbuf = memdup_user_nul(buf, size); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - - inode_lock(file_inode(file)); + return -ENOMEM; + refcount_set(&sysnames->usage, 1); + kill = sysnames; - p = kbuf; + p = buf; while ((s = strsep(&p, " \t\n"))) { len = strlen(s); if (len == 0) @@ -731,85 +485,36 @@ static ssize_t afs_proc_sysname_write(struct file *file, sysnames->nr++; } - ret = size; /* consume everything, always */ + if (sysnames->nr == 0) { + sysnames->subs[0] = sysnames->blank; + sysnames->nr++; + } + + write_lock(&net->sysnames_lock); + kill = net->sysnames; + net->sysnames = sysnames; + write_unlock(&net->sysnames_lock); + ret = 0; out: - inode_unlock(file_inode(file)); - kfree(kbuf); + afs_put_sysnames(kill); return ret; invalid: ret = -EINVAL; error: - sysnames->error = ret; goto out; } -static int afs_proc_sysname_release(struct inode *inode, struct file *file) +void afs_put_sysnames(struct afs_sysnames *sysnames) { - struct afs_sysnames *sysnames, *kill = NULL; - struct seq_file *m = file->private_data; - struct afs_net *net = afs_seq2net(m); + int i; - sysnames = m->private; - if (sysnames) { - if (!sysnames->error) { - kill = sysnames; - if (sysnames->nr == 0) { - sysnames->subs[0] = sysnames->blank; - sysnames->nr++; - } - write_lock(&net->sysnames_lock); - kill = net->sysnames; - net->sysnames = sysnames; - write_unlock(&net->sysnames_lock); - } - afs_put_sysnames(kill); + if (sysnames && refcount_dec_and_test(&sysnames->usage)) { + for (i = 0; i < sysnames->nr; i++) + if (sysnames->subs[i] != afs_init_sysname && + sysnames->subs[i] != sysnames->blank) + kfree(sysnames->subs[i]); } - - return seq_release(inode, file); -} - -static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos) - __acquires(&net->sysnames_lock) -{ - struct afs_net *net = afs_seq2net(m); - struct afs_sysnames *names = net->sysnames; - - read_lock(&net->sysnames_lock); - - if (*pos >= names->nr) - return NULL; - return (void *)(unsigned long)(*pos + 1); -} - -static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct afs_net *net = afs_seq2net(m); - struct afs_sysnames *names = net->sysnames; - - *pos += 1; - if (*pos >= names->nr) - return NULL; - return (void *)(unsigned long)(*pos + 1); -} - -static void afs_proc_sysname_stop(struct seq_file *m, void *v) - __releases(&net->sysnames_lock) -{ - struct afs_net *net = afs_seq2net(m); - - read_unlock(&net->sysnames_lock); -} - -static int afs_proc_sysname_show(struct seq_file *m, void *v) -{ - struct afs_net *net = afs_seq2net(m); - struct afs_sysnames *sysnames = net->sysnames; - unsigned int i = (unsigned long)v - 1; - - if (i < sysnames->nr) - seq_printf(m, "%s\n", sysnames->subs[i]); - return 0; } /* @@ -817,7 +522,7 @@ static int afs_proc_sysname_show(struct seq_file *m, void *v) */ static int afs_proc_stats_show(struct seq_file *m, void *v) { - struct afs_net *net = afs_seq2net(m); + struct afs_net *net = afs_seq2net_single(m); seq_puts(m, "kAFS statistics\n"); @@ -842,3 +547,101 @@ static int afs_proc_stats_show(struct seq_file *m, void *v) atomic_long_read(&net->n_store_bytes)); return 0; } + +/* + * initialise /proc/fs/afs/<cell>/ + */ +int afs_proc_cell_setup(struct afs_cell *cell) +{ + struct proc_dir_entry *dir; + struct afs_net *net = cell->net; + + _enter("%p{%s},%p", cell, cell->name, net->proc_afs); + + dir = proc_net_mkdir(net->net, cell->name, net->proc_afs); + if (!dir) + goto error_dir; + + if (!proc_create_net_data("vlservers", 0444, dir, + &afs_proc_cell_vlservers_ops, + sizeof(struct seq_net_private), + cell) || + !proc_create_net_data("volumes", 0444, dir, + &afs_proc_cell_volumes_ops, + sizeof(struct seq_net_private), + cell)) + goto error_tree; + + _leave(" = 0"); + return 0; + +error_tree: + remove_proc_subtree(cell->name, net->proc_afs); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * remove /proc/fs/afs/<cell>/ + */ +void afs_proc_cell_remove(struct afs_cell *cell) +{ + struct afs_net *net = cell->net; + + _enter(""); + remove_proc_subtree(cell->name, net->proc_afs); + _leave(""); +} + +/* + * initialise the /proc/fs/afs/ directory + */ +int afs_proc_init(struct afs_net *net) +{ + struct proc_dir_entry *p; + + _enter(""); + + p = proc_net_mkdir(net->net, "afs", net->net->proc_net); + if (!p) + goto error_dir; + + if (!proc_create_net_data_write("cells", 0644, p, + &afs_proc_cells_ops, + afs_proc_cells_write, + sizeof(struct seq_net_private), + NULL) || + !proc_create_net_single_write("rootcell", 0644, p, + afs_proc_rootcell_show, + afs_proc_rootcell_write, + NULL) || + !proc_create_net("servers", 0444, p, &afs_proc_servers_ops, + sizeof(struct seq_net_private)) || + !proc_create_net_single("stats", 0444, p, afs_proc_stats_show, NULL) || + !proc_create_net_data_write("sysname", 0644, p, + &afs_proc_sysname_ops, + afs_proc_sysname_write, + sizeof(struct seq_net_private), + NULL)) + goto error_tree; + + net->proc_afs = p; + _leave(" = 0"); + return 0; + +error_tree: + proc_remove(p); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * clean up the /proc/fs/afs/ directory + */ +void afs_proc_cleanup(struct afs_net *net) +{ + proc_remove(net->proc_afs); + net->proc_afs = NULL; +} diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 08735948f15d..a1b18082991b 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -46,7 +46,7 @@ int afs_open_socket(struct afs_net *net) _enter(""); - ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket); + ret = sock_create_kern(net->net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket); if (ret < 0) goto error_1; diff --git a/fs/afs/server.c b/fs/afs/server.c index 3af4625e2f8c..1d329e6981d5 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -228,7 +228,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, server->flags = (1UL << AFS_SERVER_FL_NEW); server->update_at = ktime_get_real_seconds() + afs_server_update_delay; rwlock_init(&server->fs_lock); - INIT_LIST_HEAD(&server->cb_interests); + INIT_HLIST_HEAD(&server->cb_volumes); rwlock_init(&server->cb_break_lock); afs_inc_servers_outstanding(net); diff --git a/fs/afs/super.c b/fs/afs/super.c index 9e5d7966621c..4d3e274207fb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -48,6 +48,8 @@ struct file_system_type afs_fs_type = { }; MODULE_ALIAS_FS("afs"); +int afs_net_id; + static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, @@ -117,7 +119,7 @@ int __init afs_fs_init(void) /* * clean up the filesystem */ -void __exit afs_fs_exit(void) +void afs_fs_exit(void) { _enter(""); @@ -351,14 +353,19 @@ static int afs_test_super(struct super_block *sb, void *data) struct afs_super_info *as1 = data; struct afs_super_info *as = AFS_FS_S(sb); - return (as->net == as1->net && + return (as->net_ns == as1->net_ns && as->volume && - as->volume->vid == as1->volume->vid); + as->volume->vid == as1->volume->vid && + !as->dyn_root); } static int afs_dynroot_test_super(struct super_block *sb, void *data) { - return false; + struct afs_super_info *as1 = data; + struct afs_super_info *as = AFS_FS_S(sb); + + return (as->net_ns == as1->net_ns && + as->dyn_root); } static int afs_set_super(struct super_block *sb, void *data) @@ -418,10 +425,14 @@ static int afs_fill_super(struct super_block *sb, if (!sb->s_root) goto error; - if (params->dyn_root) + if (as->dyn_root) { sb->s_d_op = &afs_dynroot_dentry_operations; - else + ret = afs_dynroot_populate(sb); + if (ret < 0) + goto error; + } else { sb->s_d_op = &afs_fs_dentry_operations; + } _leave(" = 0"); return 0; @@ -437,7 +448,7 @@ static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (as) { - as->net = afs_get_net(params->net); + as->net_ns = get_net(params->net_ns); if (params->dyn_root) as->dyn_root = true; else @@ -450,12 +461,31 @@ static void afs_destroy_sbi(struct afs_super_info *as) { if (as) { afs_put_volume(as->cell, as->volume); - afs_put_cell(as->net, as->cell); - afs_put_net(as->net); + afs_put_cell(afs_net(as->net_ns), as->cell); + put_net(as->net_ns); kfree(as); } } +static void afs_kill_super(struct super_block *sb) +{ + struct afs_super_info *as = AFS_FS_S(sb); + struct afs_net *net = afs_net(as->net_ns); + + if (as->dyn_root) + afs_dynroot_depopulate(sb); + + /* Clear the callback interests (which will do ilookup5) before + * deactivating the superblock. + */ + if (as->volume) + afs_clear_callback_interests(net, as->volume->servers); + kill_anon_super(sb); + if (as->volume) + afs_deactivate_volume(as->volume); + afs_destroy_sbi(as); +} + /* * get an AFS superblock */ @@ -472,12 +502,13 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, _enter(",,%s,%p", dev_name, options); memset(¶ms, 0, sizeof(params)); - params.net = &__afs_net; ret = -EINVAL; if (current->nsproxy->net_ns != &init_net) goto error; - + params.net_ns = current->nsproxy->net_ns; + params.net = afs_net(params.net_ns); + /* parse the options and device name */ if (options) { ret = afs_parse_options(¶ms, options, &dev_name); @@ -563,21 +594,6 @@ error: return ERR_PTR(ret); } -static void afs_kill_super(struct super_block *sb) -{ - struct afs_super_info *as = AFS_FS_S(sb); - - /* Clear the callback interests (which will do ilookup5) before - * deactivating the superblock. - */ - if (as->volume) - afs_clear_callback_interests(as->net, as->volume->servers); - kill_anon_super(sb); - if (as->volume) - afs_deactivate_volume(as->volume); - afs_destroy_sbi(as); -} - /* * Initialise an inode cache slab element prior to any use. Note that * afs_alloc_inode() *must* reset anything that could incorrectly leak from one @@ -1661,7 +1661,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (mask && !(mask & req->events)) return 0; - mask = file->f_op->poll_mask(file, req->events); + mask = file->f_op->poll_mask(file, req->events) & req->events; if (!mask) return 0; @@ -1719,7 +1719,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) spin_lock_irq(&ctx->ctx_lock); spin_lock(&req->head->lock); - mask = req->file->f_op->poll_mask(req->file, req->events); + mask = req->file->f_op->poll_mask(req->file, req->events) & req->events; if (!mask) { __add_wait_queue(req->head, &req->wait); list_add_tail(&aiocb->ki_list, &ctx->active_reqs); diff --git a/fs/eventfd.c b/fs/eventfd.c index 61c9514da5e9..ceb1031f1cac 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -156,11 +156,11 @@ static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) count = READ_ONCE(ctx->count); if (count > 0) - events |= EPOLLIN; + events |= (EPOLLIN & eventmask); if (count == ULLONG_MAX) events |= EPOLLERR; if (ULLONG_MAX - 1 > count) - events |= EPOLLOUT; + events |= (EPOLLOUT & eventmask); return events; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 67db22fe99c5..ea4436f409fb 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -922,13 +922,17 @@ static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head return 0; } -static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) +static struct wait_queue_head *ep_eventpoll_get_poll_head(struct file *file, + __poll_t eventmask) { struct eventpoll *ep = file->private_data; - int depth = 0; + return &ep->poll_wait; +} - /* Insert inside our poll wait queue */ - poll_wait(file, &ep->poll_wait, wait); +static __poll_t ep_eventpoll_poll_mask(struct file *file, __poll_t eventmask) +{ + struct eventpoll *ep = file->private_data; + int depth = 0; /* * Proceed to find out if wanted events are really available inside @@ -968,7 +972,8 @@ static const struct file_operations eventpoll_fops = { .show_fdinfo = ep_show_fdinfo, #endif .release = ep_eventpoll_release, - .poll = ep_eventpoll_poll, + .get_poll_head = ep_eventpoll_get_poll_head, + .poll_mask = ep_eventpoll_poll_mask, .llseek = noop_llseek, }; diff --git a/fs/namei.c b/fs/namei.c index 2490ddb8bc90..734cef54fdf8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2464,6 +2464,35 @@ static int lookup_one_len_common(const char *name, struct dentry *base, } /** + * try_lookup_one_len - filesystem helper to lookup single pathname component + * @name: pathname component to lookup + * @base: base directory to lookup from + * @len: maximum length @len should be interpreted to + * + * Look up a dentry by name in the dcache, returning NULL if it does not + * currently exist. The function does not try to create a dentry. + * + * Note that this routine is purely a helper for filesystem usage and should + * not be called by generic code. + * + * The caller must hold base->i_mutex. + */ +struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len) +{ + struct qstr this; + int err; + + WARN_ON_ONCE(!inode_is_locked(base->d_inode)); + + err = lookup_one_len_common(name, base, len, &this); + if (err) + return ERR_PTR(err); + + return lookup_dcache(&this, base, 0); +} +EXPORT_SYMBOL(try_lookup_one_len); + +/** * lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup * @base: base directory to lookup from diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 63a1ca4b9dee..e2bea2ac5dfb 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -79,12 +79,11 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) */ static int dnotify_handle_event(struct fsnotify_group *group, struct inode *inode, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, const unsigned char *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { + struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct dnotify_mark *dn_mark; struct dnotify_struct *dn; struct dnotify_struct **prev; @@ -95,7 +94,8 @@ static int dnotify_handle_event(struct fsnotify_group *group, if (!S_ISDIR(inode->i_mode)) return 0; - BUG_ON(vfsmount_mark); + if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info))) + return 0; dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); @@ -319,7 +319,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { - error = fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); + error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0); if (error) { mutex_unlock(&dnotify_group->mark_mutex); goto out_err; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d94e8031fe5f..f90842efea13 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -87,17 +87,17 @@ static int fanotify_get_response(struct fsnotify_group *group, return ret; } -static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmnt_mark, - u32 event_mask, - const void *data, int data_type) +static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, + u32 event_mask, const void *data, + int data_type) { __u32 marks_mask = 0, marks_ignored_mask = 0; const struct path *path = data; + struct fsnotify_mark *mark; + int type; - pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p" - " data_type=%d\n", __func__, inode_mark, vfsmnt_mark, - event_mask, data, data_type); + pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", + __func__, iter_info->report_mask, event_mask, data, data_type); /* if we don't have enough info to send an event to userspace say no */ if (data_type != FSNOTIFY_EVENT_PATH) @@ -108,20 +108,21 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, !d_can_lookup(path->dentry)) return false; - /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! - */ - if (inode_mark && - (!(event_mask & FS_EVENT_ON_CHILD) || - (inode_mark->mask & FS_EVENT_ON_CHILD))) { - marks_mask |= inode_mark->mask; - marks_ignored_mask |= inode_mark->ignored_mask; - } + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + mark = iter_info->marks[type]; + /* + * if the event is for a child and this inode doesn't care about + * events on the child, don't send it! + */ + if (type == FSNOTIFY_OBJ_TYPE_INODE && + (event_mask & FS_EVENT_ON_CHILD) && + !(mark->mask & FS_EVENT_ON_CHILD)) + continue; - if (vfsmnt_mark) { - marks_mask |= vfsmnt_mark->mask; - marks_ignored_mask |= vfsmnt_mark->ignored_mask; + marks_mask |= mark->mask; + marks_ignored_mask |= mark->ignored_mask; } if (d_is_dir(path->dentry) && @@ -178,8 +179,6 @@ init: __maybe_unused static int fanotify_handle_event(struct fsnotify_group *group, struct inode *inode, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *fanotify_mark, u32 mask, const void *data, int data_type, const unsigned char *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) @@ -199,8 +198,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); - if (!fanotify_should_send_event(inode_mark, fanotify_mark, mask, data, - data_type)) + if (!fanotify_should_send_event(iter_info, mask, data, data_type)) return 0; pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index d478629c728b..10aac1942c9f 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -77,7 +77,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) struct inotify_inode_mark *inode_mark; struct inode *inode; - if (!(mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE)) + if (mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE) return; inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); @@ -116,7 +116,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) mflags |= FAN_MARK_IGNORED_SURV_MODIFY; - if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE) { + if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = igrab(mark->connector->inode); if (!inode) return; @@ -126,7 +126,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); - } else if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { struct mount *mnt = real_mount(mark->connector->mnt); seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 613ec7e5a465..f174397b63a0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -184,8 +184,6 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask EXPORT_SYMBOL_GPL(__fsnotify_parent); static int send_to_group(struct inode *to_tell, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmount_mark, __u32 mask, const void *data, int data_is, u32 cookie, const unsigned char *file_name, @@ -195,48 +193,45 @@ static int send_to_group(struct inode *to_tell, __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); __u32 marks_mask = 0; __u32 marks_ignored_mask = 0; + struct fsnotify_mark *mark; + int type; - if (unlikely(!inode_mark && !vfsmount_mark)) { - BUG(); + if (WARN_ON(!iter_info->report_mask)) return 0; - } /* clear ignored on inode modification */ if (mask & FS_MODIFY) { - if (inode_mark && - !(inode_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) - inode_mark->ignored_mask = 0; - if (vfsmount_mark && - !(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) - vfsmount_mark->ignored_mask = 0; - } - - /* does the inode mark tell us to do something? */ - if (inode_mark) { - group = inode_mark->group; - marks_mask |= inode_mark->mask; - marks_ignored_mask |= inode_mark->ignored_mask; + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + mark = iter_info->marks[type]; + if (mark && + !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) + mark->ignored_mask = 0; + } } - /* does the vfsmount_mark tell us to do something? */ - if (vfsmount_mark) { - group = vfsmount_mark->group; - marks_mask |= vfsmount_mark->mask; - marks_ignored_mask |= vfsmount_mark->ignored_mask; + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + mark = iter_info->marks[type]; + /* does the object mark tell us to do something? */ + if (mark) { + group = mark->group; + marks_mask |= mark->mask; + marks_ignored_mask |= mark->ignored_mask; + } } - pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" - " vfsmount_mark=%p marks_mask=%x marks_ignored_mask=%x" + pr_debug("%s: group=%p to_tell=%p mask=%x marks_mask=%x marks_ignored_mask=%x" " data=%p data_is=%d cookie=%d\n", - __func__, group, to_tell, mask, inode_mark, vfsmount_mark, - marks_mask, marks_ignored_mask, data, - data_is, cookie); + __func__, group, to_tell, mask, marks_mask, marks_ignored_mask, + data, data_is, cookie); if (!(test_mask & marks_mask & ~marks_ignored_mask)) return 0; - return group->ops->handle_event(group, to_tell, inode_mark, - vfsmount_mark, mask, data, data_is, + return group->ops->handle_event(group, to_tell, mask, data, data_is, file_name, cookie, iter_info); } @@ -264,6 +259,57 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) } /* + * iter_info is a multi head priority queue of marks. + * Pick a subset of marks from queue heads, all with the + * same group and set the report_mask for selected subset. + * Returns the report_mask of the selected subset. + */ +static unsigned int fsnotify_iter_select_report_types( + struct fsnotify_iter_info *iter_info) +{ + struct fsnotify_group *max_prio_group = NULL; + struct fsnotify_mark *mark; + int type; + + /* Choose max prio group among groups of all queue heads */ + fsnotify_foreach_obj_type(type) { + mark = iter_info->marks[type]; + if (mark && + fsnotify_compare_groups(max_prio_group, mark->group) > 0) + max_prio_group = mark->group; + } + + if (!max_prio_group) + return 0; + + /* Set the report mask for marks from same group as max prio group */ + iter_info->report_mask = 0; + fsnotify_foreach_obj_type(type) { + mark = iter_info->marks[type]; + if (mark && + fsnotify_compare_groups(max_prio_group, mark->group) == 0) + fsnotify_iter_set_report_type(iter_info, type); + } + + return iter_info->report_mask; +} + +/* + * Pop from iter_info multi head queue, the marks that were iterated in the + * current iteration step. + */ +static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) +{ + int type; + + fsnotify_foreach_obj_type(type) { + if (fsnotify_iter_should_report_type(iter_info, type)) + iter_info->marks[type] = + fsnotify_next_mark(iter_info->marks[type]); + } +} + +/* * This is the main call to fsnotify. The VFS calls into hook specific functions * in linux/fsnotify.h. Those functions then in turn call here. Here will call * out to all of the registered fsnotify_group. Those groups can then use the @@ -307,15 +353,15 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, if ((mask & FS_MODIFY) || (test_mask & to_tell->i_fsnotify_mask)) { - iter_info.inode_mark = + iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&to_tell->i_fsnotify_marks); } if (mnt && ((mask & FS_MODIFY) || (test_mask & mnt->mnt_fsnotify_mask))) { - iter_info.inode_mark = + iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&to_tell->i_fsnotify_marks); - iter_info.vfsmount_mark = + iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } @@ -324,32 +370,14 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * ignore masks are properly reflected for mount mark notifications. * That's why this traversal is so complicated... */ - while (iter_info.inode_mark || iter_info.vfsmount_mark) { - struct fsnotify_mark *inode_mark = iter_info.inode_mark; - struct fsnotify_mark *vfsmount_mark = iter_info.vfsmount_mark; - - if (inode_mark && vfsmount_mark) { - int cmp = fsnotify_compare_groups(inode_mark->group, - vfsmount_mark->group); - if (cmp > 0) - inode_mark = NULL; - else if (cmp < 0) - vfsmount_mark = NULL; - } - - ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, - data, data_is, cookie, file_name, - &iter_info); + while (fsnotify_iter_select_report_types(&iter_info)) { + ret = send_to_group(to_tell, mask, data, data_is, cookie, + file_name, &iter_info); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; - if (inode_mark) - iter_info.inode_mark = - fsnotify_next_mark(iter_info.inode_mark); - if (vfsmount_mark) - iter_info.vfsmount_mark = - fsnotify_next_mark(iter_info.vfsmount_mark); + fsnotify_iter_next(&iter_info); } ret = 0; out: diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 60f365dc1408..34515d2c4ba3 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -9,12 +9,6 @@ #include "../mount.h" -struct fsnotify_iter_info { - struct fsnotify_mark *inode_mark; - struct fsnotify_mark *vfsmount_mark; - int srcu_idx; -}; - /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); diff --git a/fs/notify/group.c b/fs/notify/group.c index b7a4b6a69efa..aa5468f23e45 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -67,7 +67,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) fsnotify_group_stop_queueing(group); /* Clear all marks for this group and queue them for destruction */ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES_MASK); /* * Some marks can still be pinned when waiting for response from diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index c00d2caca894..7e4578d35b61 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -25,8 +25,6 @@ extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group); extern int inotify_handle_event(struct fsnotify_group *group, struct inode *inode, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, const unsigned char *file_name, u32 cookie, struct fsnotify_iter_info *iter_info); diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 40dedb37a1f3..9ab6dde38a14 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -65,12 +65,11 @@ static int inotify_merge(struct list_head *list, int inotify_handle_event(struct fsnotify_group *group, struct inode *inode, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, const unsigned char *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { + struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct inotify_inode_mark *i_mark; struct inotify_event_info *event; struct fsnotify_event *fsn_event; @@ -78,7 +77,8 @@ int inotify_handle_event(struct fsnotify_group *group, int len = 0; int alloc_len = sizeof(struct inotify_event_info); - BUG_ON(vfsmount_mark); + if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info))) + return 0; if ((inode_mark->mask & FS_EXCL_UNLINK) && (data_type == FSNOTIFY_EVENT_PATH)) { diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ef32f3657958..1cf5b779d862 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -485,10 +485,14 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { struct inotify_inode_mark *i_mark; + struct fsnotify_iter_info iter_info = { }; + + fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE, + fsn_mark); /* Queue ignore event for the watch */ - inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, - NULL, FSNOTIFY_EVENT_NONE, NULL, 0, NULL); + inotify_handle_event(group, NULL, FS_IN_IGNORED, NULL, + FSNOTIFY_EVENT_NONE, NULL, 0, &iter_info); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -578,7 +582,7 @@ static int inotify_new_watch(struct fsnotify_group *group, } /* we are on the idr, now get on the inode */ - ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, inode, NULL, 0); + ret = fsnotify_add_inode_mark_locked(&tmp_i_mark->fsn_mark, inode, 0); if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index e9191b416434..61f4c5fa34c7 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -119,9 +119,9 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) new_mask |= mark->mask; } - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) conn->inode->i_fsnotify_mask = new_mask; - else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) + else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask; } @@ -139,7 +139,7 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) spin_lock(&conn->lock); __fsnotify_recalc_mask(conn); spin_unlock(&conn->lock); - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) __fsnotify_update_child_dentry_flags(conn->inode); } @@ -166,18 +166,18 @@ static struct inode *fsnotify_detach_connector_from_object( { struct inode *inode = NULL; - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = conn->inode; rcu_assign_pointer(inode->i_fsnotify_marks, NULL); inode->i_fsnotify_mask = 0; conn->inode = NULL; - conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; - } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; + } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, NULL); real_mount(conn->mnt)->mnt_fsnotify_mask = 0; conn->mnt = NULL; - conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; + conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; } return inode; @@ -294,12 +294,12 @@ static void fsnotify_put_mark_wake(struct fsnotify_mark *mark) bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) { - /* This can fail if mark is being removed */ - if (!fsnotify_get_mark_safe(iter_info->inode_mark)) - return false; - if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) { - fsnotify_put_mark_wake(iter_info->inode_mark); - return false; + int type; + + fsnotify_foreach_obj_type(type) { + /* This can fail if mark is being removed */ + if (!fsnotify_get_mark_safe(iter_info->marks[type])) + goto fail; } /* @@ -310,13 +310,20 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); return true; + +fail: + for (type--; type >= 0; type--) + fsnotify_put_mark_wake(iter_info->marks[type]); + return false; } void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) { + int type; + iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - fsnotify_put_mark_wake(iter_info->inode_mark); - fsnotify_put_mark_wake(iter_info->vfsmount_mark); + fsnotify_foreach_obj_type(type) + fsnotify_put_mark_wake(iter_info->marks[type]); } /* @@ -442,10 +449,10 @@ static int fsnotify_attach_connector_to_object( spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); if (inode) { - conn->flags = FSNOTIFY_OBJ_TYPE_INODE; + conn->type = FSNOTIFY_OBJ_TYPE_INODE; conn->inode = igrab(inode); } else { - conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT; + conn->type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; conn->mnt = mnt; } /* @@ -479,8 +486,7 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector( if (!conn) goto out; spin_lock(&conn->lock); - if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE | - FSNOTIFY_OBJ_TYPE_VFSMOUNT))) { + if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) { spin_unlock(&conn->lock); srcu_read_unlock(&fsnotify_mark_srcu, idx); return NULL; @@ -646,16 +652,16 @@ struct fsnotify_mark *fsnotify_find_mark( return NULL; } -/* Clear any marks in a group with given type */ +/* Clear any marks in a group with given type mask */ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, - unsigned int type) + unsigned int type_mask) { struct fsnotify_mark *lmark, *mark; LIST_HEAD(to_free); struct list_head *head = &to_free; /* Skip selection step if we want to clear all marks. */ - if (type == FSNOTIFY_OBJ_ALL_TYPES) { + if (type_mask == FSNOTIFY_OBJ_ALL_TYPES_MASK) { head = &group->marks_list; goto clear; } @@ -670,7 +676,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, */ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->connector->flags & type) + if ((1U << mark->connector->type) & type_mask) list_move(&mark->g_list, &to_free); } mutex_unlock(&group->mark_mutex); diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 74b37cbbd5d4..33ee8cb32f83 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -719,37 +719,6 @@ struct ORANGEFS_dev_map_desc32 { __s32 count; }; -static unsigned long translate_dev_map26(unsigned long args, long *error) -{ - struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args; - /* - * Depending on the architecture, allocate some space on the - * user-call-stack based on our expected layout. - */ - struct ORANGEFS_dev_map_desc __user *p = - compat_alloc_user_space(sizeof(*p)); - compat_uptr_t addr; - - *error = 0; - /* get the ptr from the 32 bit user-space */ - if (get_user(addr, &p32->ptr)) - goto err; - /* try to put that into a 64-bit layout */ - if (put_user(compat_ptr(addr), &p->ptr)) - goto err; - /* copy the remaining fields */ - if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32))) - goto err; - if (copy_in_user(&p->size, &p32->size, sizeof(__s32))) - goto err; - if (copy_in_user(&p->count, &p32->count, sizeof(__s32))) - goto err; - return (unsigned long)p; -err: - *error = -EFAULT; - return 0; -} - /* * 32 bit user-space apps' ioctl handlers when kernel modules * is compiled as a 64 bit one @@ -758,25 +727,26 @@ static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long args) { long ret; - unsigned long arg = args; /* Check for properly constructed commands */ ret = check_ioctl_command(cmd); if (ret < 0) return ret; if (cmd == ORANGEFS_DEV_MAP) { - /* - * convert the arguments to what we expect internally - * in kernel space - */ - arg = translate_dev_map26(args, &ret); - if (ret < 0) { - gossip_err("Could not translate dev map\n"); - return ret; - } + struct ORANGEFS_dev_map_desc desc; + struct ORANGEFS_dev_map_desc32 d32; + + if (copy_from_user(&d32, (void __user *)args, sizeof(d32))) + return -EFAULT; + + desc.ptr = compat_ptr(d32.ptr); + desc.total_size = d32.total_size; + desc.size = d32.size; + desc.count = d32.count; + return orangefs_bufmap_initialize(&desc); } /* no other ioctl requires translation */ - return dispatch_ioctl_command(cmd, arg); + return dispatch_ioctl_command(cmd, args); } #endif /* CONFIG_COMPAT is in .config */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 7b4d9714f248..6ac1c92997ea 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -409,7 +409,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, if (!ent) goto out; - if (qstr.len + 1 <= sizeof(ent->inline_name)) { + if (qstr.len + 1 <= SIZEOF_PDE_INLINE_NAME) { ent->name = ent->inline_name; } else { ent->name = kmalloc(qstr.len + 1, GFP_KERNEL); @@ -740,3 +740,27 @@ void *PDE_DATA(const struct inode *inode) return __PDE_DATA(inode); } EXPORT_SYMBOL(PDE_DATA); + +/* + * Pull a user buffer into memory and pass it to the file's write handler if + * one is supplied. The ->write() method is permitted to modify the + * kernel-side buffer. + */ +ssize_t proc_simple_write(struct file *f, const char __user *ubuf, size_t size, + loff_t *_pos) +{ + struct proc_dir_entry *pde = PDE(file_inode(f)); + char *buf; + int ret; + + if (!pde->write) + return -EACCES; + if (size == 0 || size > PAGE_SIZE - 1) + return -EINVAL; + buf = memdup_user_nul(ubuf, size); + if (IS_ERR(buf)) + return PTR_ERR(buf); + ret = pde->write(f, buf, size); + kfree(buf); + return ret == 0 ? size : ret; +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 2cf3b74391ca..85ffbd27f288 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -105,9 +105,8 @@ void __init proc_init_kmemcache(void) kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0, SLAB_ACCOUNT|SLAB_PANIC, NULL); proc_dir_entry_cache = kmem_cache_create_usercopy( - "proc_dir_entry", sizeof(struct proc_dir_entry), 0, SLAB_PANIC, - offsetof(struct proc_dir_entry, inline_name), - sizeof_field(struct proc_dir_entry, inline_name), NULL); + "proc_dir_entry", SIZEOF_PDE_SLOT, 0, SLAB_PANIC, + OFFSETOF_PDE_NAME, SIZEOF_PDE_INLINE_NAME, NULL); } static int proc_show_options(struct seq_file *seq, struct dentry *root) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 50cb22a08c2f..da3dbfa09e79 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -48,6 +48,7 @@ struct proc_dir_entry { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); }; + proc_write_t write; void *data; unsigned int state_size; unsigned int low_ino; @@ -61,14 +62,20 @@ struct proc_dir_entry { char *name; umode_t mode; u8 namelen; -#ifdef CONFIG_64BIT -#define SIZEOF_PDE_INLINE_NAME (192-155) -#else -#define SIZEOF_PDE_INLINE_NAME (128-95) -#endif - char inline_name[SIZEOF_PDE_INLINE_NAME]; + char inline_name[]; } __randomize_layout; +#define OFFSETOF_PDE_NAME offsetof(struct proc_dir_entry, inline_name) +#define SIZEOF_PDE_SLOT \ + (OFFSETOF_PDE_NAME + 34 <= 64 ? 64 : \ + OFFSETOF_PDE_NAME + 34 <= 128 ? 128 : \ + OFFSETOF_PDE_NAME + 34 <= 192 ? 192 : \ + OFFSETOF_PDE_NAME + 34 <= 256 ? 256 : \ + OFFSETOF_PDE_NAME + 34 <= 512 ? 512 : \ + 0) + +#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE_SLOT - OFFSETOF_PDE_NAME) + extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); @@ -189,6 +196,7 @@ static inline bool is_empty_pde(const struct proc_dir_entry *pde) { return S_ISDIR(pde->mode) && !pde->proc_iops; } +extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, loff_t *); /* * inode.c diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 7d94fa005b0d..d5e0fcb3439e 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -46,6 +46,9 @@ static int seq_open_net(struct inode *inode, struct file *file) WARN_ON_ONCE(state_size < sizeof(*p)); + if (file->f_mode & FMODE_WRITE && !PDE(inode)->write) + return -EACCES; + net = get_proc_net(inode); if (!net) return -ENXIO; @@ -73,6 +76,7 @@ static int seq_release_net(struct inode *ino, struct file *f) static const struct file_operations proc_net_seq_fops = { .open = seq_open_net, .read = seq_read, + .write = proc_simple_write, .llseek = seq_lseek, .release = seq_release_net, }; @@ -93,6 +97,50 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(proc_create_net_data); +/** + * proc_create_net_data_write - Create a writable net_ns-specific proc file + * @name: The name of the file. + * @mode: The file's access mode. + * @parent: The parent directory in which to create. + * @ops: The seq_file ops with which to read the file. + * @write: The write method which which to 'modify' the file. + * @data: Data for retrieval by PDE_DATA(). + * + * Create a network namespaced proc file in the @parent directory with the + * specified @name and @mode that allows reading of a file that displays a + * series of elements and also provides for the file accepting writes that have + * some arbitrary effect. + * + * The functions in the @ops table are used to iterate over items to be + * presented and extract the readable content using the seq_file interface. + * + * The @write function is called with the data copied into a kernel space + * scratch buffer and has a NUL appended for convenience. The buffer may be + * modified by the @write function. @write should return 0 on success. + * + * The @data value is accessible from the @show and @write functions by calling + * PDE_DATA() on the file inode. The network namespace must be accessed by + * calling seq_file_net() on the seq_file struct. + */ +struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode, + struct proc_dir_entry *parent, + const struct seq_operations *ops, + proc_write_t write, + unsigned int state_size, void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_seq_fops; + p->seq_ops = ops; + p->state_size = state_size; + p->write = write; + return proc_register(parent, p); +} +EXPORT_SYMBOL_GPL(proc_create_net_data_write); + static int single_open_net(struct inode *inode, struct file *file) { struct proc_dir_entry *de = PDE(inode); @@ -119,6 +167,7 @@ static int single_release_net(struct inode *ino, struct file *f) static const struct file_operations proc_net_single_fops = { .open = single_open_net, .read = seq_read, + .write = proc_simple_write, .llseek = seq_lseek, .release = single_release_net, }; @@ -138,6 +187,49 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(proc_create_net_single); +/** + * proc_create_net_single_write - Create a writable net_ns-specific proc file + * @name: The name of the file. + * @mode: The file's access mode. + * @parent: The parent directory in which to create. + * @show: The seqfile show method with which to read the file. + * @write: The write method which which to 'modify' the file. + * @data: Data for retrieval by PDE_DATA(). + * + * Create a network-namespaced proc file in the @parent directory with the + * specified @name and @mode that allows reading of a file that displays a + * single element rather than a series and also provides for the file accepting + * writes that have some arbitrary effect. + * + * The @show function is called to extract the readable content via the + * seq_file interface. + * + * The @write function is called with the data copied into a kernel space + * scratch buffer and has a NUL appended for convenience. The buffer may be + * modified by the @write function. @write should return 0 on success. + * + * The @data value is accessible from the @show and @write functions by calling + * PDE_DATA() on the file inode. The network namespace must be accessed by + * calling seq_file_single_net() on the seq_file struct. + */ +struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), + proc_write_t write, + void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_single_fops; + p->single_show = show; + p->write = write; + return proc_register(parent, p); +} +EXPORT_SYMBOL_GPL(proc_create_net_single_write); + static struct net *get_proc_task_net(struct inode *dir) { struct task_struct *task; diff --git a/fs/proc/root.c b/fs/proc/root.c index 61b7340b357a..f4b1a9d2eca6 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -204,8 +204,7 @@ struct proc_dir_entry proc_root = { .proc_fops = &proc_root_operations, .parent = &proc_root, .subdir = RB_ROOT, - .name = proc_root.inline_name, - .inline_name = "/proc", + .name = "/proc", }; int pid_ns_prepare_proc(struct pid_namespace *ns) diff --git a/fs/signalfd.c b/fs/signalfd.c index cbb42f77a2bd..4fcd1498acf5 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -259,10 +259,8 @@ static const struct file_operations signalfd_fops = { .llseek = noop_llseek, }; -static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, - int flags) +static int do_signalfd4(int ufd, sigset_t *mask, int flags) { - sigset_t sigmask; struct signalfd_ctx *ctx; /* Check the SFD_* constants for consistency. */ @@ -272,18 +270,15 @@ static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK)) return -EINVAL; - if (sizemask != sizeof(sigset_t) || - copy_from_user(&sigmask, user_mask, sizeof(sigmask))) - return -EINVAL; - sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - signotset(&sigmask); + sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + signotset(mask); if (ufd == -1) { ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - ctx->sigmask = sigmask; + ctx->sigmask = *mask; /* * When we call this, the initialization must be complete, since @@ -303,7 +298,7 @@ static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, return -EINVAL; } spin_lock_irq(¤t->sighand->siglock); - ctx->sigmask = sigmask; + ctx->sigmask = *mask; spin_unlock_irq(¤t->sighand->siglock); wake_up(¤t->sighand->signalfd_wqh); @@ -316,46 +311,51 @@ static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, size_t, sizemask, int, flags) { - return do_signalfd4(ufd, user_mask, sizemask, flags); + sigset_t mask; + + if (sizemask != sizeof(sigset_t) || + copy_from_user(&mask, user_mask, sizeof(mask))) + return -EINVAL; + return do_signalfd4(ufd, &mask, flags); } SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask, size_t, sizemask) { - return do_signalfd4(ufd, user_mask, sizemask, 0); + sigset_t mask; + + if (sizemask != sizeof(sigset_t) || + copy_from_user(&mask, user_mask, sizeof(mask))) + return -EINVAL; + return do_signalfd4(ufd, &mask, 0); } #ifdef CONFIG_COMPAT static long do_compat_signalfd4(int ufd, - const compat_sigset_t __user *sigmask, + const compat_sigset_t __user *user_mask, compat_size_t sigsetsize, int flags) { - sigset_t tmp; - sigset_t __user *ksigmask; + sigset_t mask; if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; - if (get_compat_sigset(&tmp, sigmask)) - return -EFAULT; - ksigmask = compat_alloc_user_space(sizeof(sigset_t)); - if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) + if (get_compat_sigset(&mask, user_mask)) return -EFAULT; - - return do_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags); + return do_signalfd4(ufd, &mask, flags); } COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd, - const compat_sigset_t __user *, sigmask, + const compat_sigset_t __user *, user_mask, compat_size_t, sigsetsize, int, flags) { - return do_compat_signalfd4(ufd, sigmask, sigsetsize, flags); + return do_compat_signalfd4(ufd, user_mask, sigsetsize, flags); } COMPAT_SYSCALL_DEFINE3(signalfd, int, ufd, - const compat_sigset_t __user *,sigmask, + const compat_sigset_t __user *, user_mask, compat_size_t, sigsetsize) { - return do_compat_signalfd4(ufd, sigmask, sigsetsize, 0); + return do_compat_signalfd4(ufd, user_mask, sigsetsize, 0); } #endif diff --git a/fs/splice.c b/fs/splice.c index 2365ab073a27..b3daa971f597 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1243,38 +1243,26 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, * For lack of a better implementation, implement vmsplice() to userspace * as a simple copy of the pipes pages to the user iov. */ -static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, - unsigned long nr_segs, unsigned int flags) +static long vmsplice_to_user(struct file *file, struct iov_iter *iter, + unsigned int flags) { - struct pipe_inode_info *pipe; - struct splice_desc sd; - long ret; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter iter; + struct pipe_inode_info *pipe = get_pipe_info(file); + struct splice_desc sd = { + .total_len = iov_iter_count(iter), + .flags = flags, + .u.data = iter + }; + long ret = 0; - pipe = get_pipe_info(file); if (!pipe) return -EBADF; - ret = import_iovec(READ, uiov, nr_segs, - ARRAY_SIZE(iovstack), &iov, &iter); - if (ret < 0) - return ret; - - sd.total_len = iov_iter_count(&iter); - sd.len = 0; - sd.flags = flags; - sd.u.data = &iter; - sd.pos = 0; - if (sd.total_len) { pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, pipe_to_user); pipe_unlock(pipe); } - kfree(iov); return ret; } @@ -1283,14 +1271,11 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. */ -static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov, - unsigned long nr_segs, unsigned int flags) +static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, + unsigned int flags) { struct pipe_inode_info *pipe; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter from; - long ret; + long ret = 0; unsigned buf_flag = 0; if (flags & SPLICE_F_GIFT) @@ -1300,22 +1285,31 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov, if (!pipe) return -EBADF; - ret = import_iovec(WRITE, uiov, nr_segs, - ARRAY_SIZE(iovstack), &iov, &from); - if (ret < 0) - return ret; - pipe_lock(pipe); ret = wait_for_space(pipe, flags); if (!ret) - ret = iter_to_pipe(&from, pipe, buf_flag); + ret = iter_to_pipe(iter, pipe, buf_flag); pipe_unlock(pipe); if (ret > 0) wakeup_pipe_readers(pipe); - kfree(iov); return ret; } +static int vmsplice_type(struct fd f, int *type) +{ + if (!f.file) + return -EBADF; + if (f.file->f_mode & FMODE_WRITE) { + *type = WRITE; + } else if (f.file->f_mode & FMODE_READ) { + *type = READ; + } else { + fdput(f); + return -EBADF; + } + return 0; +} + /* * Note that vmsplice only really supports true splicing _from_ user memory * to a pipe, not the other way around. Splicing from user memory is a simple @@ -1332,57 +1326,69 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov, * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ -static long do_vmsplice(int fd, const struct iovec __user *iov, - unsigned long nr_segs, unsigned int flags) +static long do_vmsplice(struct file *f, struct iov_iter *iter, unsigned int flags) { - struct fd f; - long error; - if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; - if (unlikely(nr_segs > UIO_MAXIOV)) - return -EINVAL; - else if (unlikely(!nr_segs)) - return 0; - error = -EBADF; - f = fdget(fd); - if (f.file) { - if (f.file->f_mode & FMODE_WRITE) - error = vmsplice_to_pipe(f.file, iov, nr_segs, flags); - else if (f.file->f_mode & FMODE_READ) - error = vmsplice_to_user(f.file, iov, nr_segs, flags); - - fdput(f); - } + if (!iov_iter_count(iter)) + return 0; - return error; + if (iov_iter_rw(iter) == WRITE) + return vmsplice_to_pipe(f, iter, flags); + else + return vmsplice_to_user(f, iter, flags); } -SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, +SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, unsigned long, nr_segs, unsigned int, flags) { - return do_vmsplice(fd, iov, nr_segs, flags); + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + long error; + struct fd f; + int type; + + f = fdget(fd); + error = vmsplice_type(f, &type); + if (error) + return error; + + error = import_iovec(type, uiov, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (!error) { + error = do_vmsplice(f.file, &iter, flags); + kfree(iov); + } + fdput(f); + return error; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32, unsigned int, nr_segs, unsigned int, flags) { - unsigned i; - struct iovec __user *iov; - if (nr_segs > UIO_MAXIOV) - return -EINVAL; - iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec)); - for (i = 0; i < nr_segs; i++) { - struct compat_iovec v; - if (get_user(v.iov_base, &iov32[i].iov_base) || - get_user(v.iov_len, &iov32[i].iov_len) || - put_user(compat_ptr(v.iov_base), &iov[i].iov_base) || - put_user(v.iov_len, &iov[i].iov_len)) - return -EFAULT; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + long error; + struct fd f; + int type; + + f = fdget(fd); + error = vmsplice_type(f, &type); + if (error) + return error; + + error = compat_import_iovec(type, iov32, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (!error) { + error = do_vmsplice(f.file, &iter, flags); + kfree(iov); } - return do_vmsplice(fd, iov, nr_segs, flags); + fdput(f); + return error; } #endif |