diff options
Diffstat (limited to 'fs')
242 files changed, 4304 insertions, 3211 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 42e102e2e74a..85ff859d3af5 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -859,8 +859,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, static int v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t mode, - int *opened) + struct file *file, unsigned flags, umode_t mode) { int err; u32 perm; @@ -917,7 +916,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, v9inode->writeback_fid = (void *) inode_fid; } mutex_unlock(&v9inode->v_mutex); - err = finish_open(file, dentry, generic_file_open, opened); + err = finish_open(file, dentry, generic_file_open); if (err) goto error; @@ -925,7 +924,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(d_inode(dentry), file); - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; out: dput(res); return err; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 7f6ae21a27b3..4823e1c46999 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -241,8 +241,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t omode, - int *opened) + struct file *file, unsigned flags, umode_t omode) { int err = 0; kgid_t gid; @@ -352,13 +351,13 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } mutex_unlock(&v9inode->v_mutex); /* Since we are opening a file, assign the open fid to the file */ - err = finish_open(file, dentry, generic_file_open, opened); + err = finish_open(file, dentry, generic_file_open); if (err) goto err_clunk_old_fid; file->private_data = ofid; if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) v9fs_cache_inode_set_cookie(inode, file); - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; out: v9fs_put_acl(dacl, pacl); dput(res); diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 57a27c42b5ac..56df483de619 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -168,7 +168,7 @@ config BINFMT_MISC will automatically feed it to the correct interpreter. You can do other nice things, too. Read the file - <file:Documentation/binfmt_misc.txt> to learn how to use this + <file:Documentation/admin-guide/binfmt-misc.rst> to learn how to use this feature, <file:Documentation/admin-guide/java.rst> for information about how to include Java support. and <file:Documentation/admin-guide/mono.rst> for information about how to include Mono-based .NET support. diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 8dbd36f5e581..e91028d4340a 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -199,7 +199,7 @@ adfs_adfs2unix_time(struct timespec *tv, struct inode *inode) return; cur_time: - *tv = current_time(inode); + *tv = timespec64_to_timespec(current_time(inode)); return; too_early: @@ -242,6 +242,7 @@ adfs_unix2adfs_time(struct inode *inode, unsigned int secs) struct inode * adfs_iget(struct super_block *sb, struct object_info *obj) { + struct timespec ts; struct inode *inode; inode = new_inode(sb); @@ -270,7 +271,9 @@ adfs_iget(struct super_block *sb, struct object_info *obj) ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000); inode->i_mode = adfs_atts2mode(sb, inode); - adfs_adfs2unix_time(&inode->i_mtime, inode); + ts = timespec64_to_timespec(inode->i_mtime); + adfs_adfs2unix_time(&ts, inode); + inode->i_mtime = timespec_to_timespec64(ts); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; @@ -284,7 +287,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj) ADFS_I(inode)->mmu_private = inode->i_size; } - insert_inode_hash(inode); + inode_fake_hash(inode); out: return inode; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 71fa525d63a0..7e099a7a4eb1 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -291,6 +291,7 @@ static void destroy_inodecache(void) static const struct super_operations adfs_sops = { .alloc_inode = adfs_alloc_inode, .destroy_inode = adfs_destroy_inode, + .drop_inode = generic_delete_inode, .write_inode = adfs_write_inode, .put_super = adfs_put_super, .statfs = adfs_statfs, diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 532acae25453..546874057bd3 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -5,7 +5,7 @@ afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o -kafs-objs := \ +kafs-y := \ $(afs-cache-y) \ addr_list.o \ callback.o \ @@ -21,7 +21,6 @@ kafs-objs := \ main.o \ misc.o \ mntpt.o \ - proc.o \ rotate.o \ rxrpc.o \ security.o \ @@ -34,4 +33,5 @@ kafs-objs := \ write.o \ xattr.o +kafs-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_AFS_FS) := kafs.o diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 2c46c46f3a6d..025a9a5e1c32 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -215,7 +215,7 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) _enter("%s", cell->name); ret = dns_query("afsdb", cell->name, cell->name_len, - "ipv4", &vllist, _expiry); + "", &vllist, _expiry); if (ret < 0) return ERR_PTR(ret); diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 571437dcb252..5f261fbf2182 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -21,6 +21,66 @@ #include "internal.h" /* + * Create volume and callback interests on a server. + */ +static struct afs_cb_interest *afs_create_interest(struct afs_server *server, + struct afs_vnode *vnode) +{ + struct afs_vol_interest *new_vi, *vi; + struct afs_cb_interest *new; + struct hlist_node **pp; + + new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL); + if (!new_vi) + return NULL; + + new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL); + if (!new) { + kfree(new_vi); + return NULL; + } + + new_vi->usage = 1; + new_vi->vid = vnode->volume->vid; + INIT_HLIST_NODE(&new_vi->srv_link); + INIT_HLIST_HEAD(&new_vi->cb_interests); + + refcount_set(&new->usage, 1); + new->sb = vnode->vfs_inode.i_sb; + new->vid = vnode->volume->vid; + new->server = afs_get_server(server); + INIT_HLIST_NODE(&new->cb_vlink); + + write_lock(&server->cb_break_lock); + + for (pp = &server->cb_volumes.first; *pp; pp = &(*pp)->next) { + vi = hlist_entry(*pp, struct afs_vol_interest, srv_link); + if (vi->vid < new_vi->vid) + continue; + if (vi->vid > new_vi->vid) + break; + vi->usage++; + goto found_vi; + } + + new_vi->srv_link.pprev = pp; + new_vi->srv_link.next = *pp; + if (*pp) + (*pp)->pprev = &new_vi->srv_link.next; + *pp = &new_vi->srv_link; + vi = new_vi; + new_vi = NULL; +found_vi: + + new->vol_interest = vi; + hlist_add_head(&new->cb_vlink, &vi->cb_interests); + + write_unlock(&server->cb_break_lock); + kfree(new_vi); + return new; +} + +/* * Set up an interest-in-callbacks record for a volume on a server and * register it with the server. * - Called with vnode->io_lock held. @@ -77,20 +137,10 @@ again: } if (!cbi) { - new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL); + new = afs_create_interest(server, vnode); if (!new) return -ENOMEM; - refcount_set(&new->usage, 1); - new->sb = vnode->vfs_inode.i_sb; - new->vid = vnode->volume->vid; - new->server = afs_get_server(server); - INIT_LIST_HEAD(&new->cb_link); - - write_lock(&server->cb_break_lock); - list_add_tail(&new->cb_link, &server->cb_interests); - write_unlock(&server->cb_break_lock); - write_lock(&slist->lock); if (!entry->cb_interest) { entry->cb_interest = afs_get_cb_interest(new); @@ -126,11 +176,22 @@ again: */ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) { + struct afs_vol_interest *vi; + if (cbi && refcount_dec_and_test(&cbi->usage)) { - if (!list_empty(&cbi->cb_link)) { + if (!hlist_unhashed(&cbi->cb_vlink)) { write_lock(&cbi->server->cb_break_lock); - list_del_init(&cbi->cb_link); + + hlist_del_init(&cbi->cb_vlink); + vi = cbi->vol_interest; + cbi->vol_interest = NULL; + if (--vi->usage == 0) + hlist_del(&vi->srv_link); + else + vi = NULL; + write_unlock(&cbi->server->cb_break_lock); + kfree(vi); afs_put_server(net, cbi->server); } kfree(cbi); @@ -182,20 +243,34 @@ void afs_break_callback(struct afs_vnode *vnode) static void afs_break_one_callback(struct afs_server *server, struct afs_fid *fid) { + struct afs_vol_interest *vi; struct afs_cb_interest *cbi; struct afs_iget_data data; struct afs_vnode *vnode; struct inode *inode; read_lock(&server->cb_break_lock); + hlist_for_each_entry(vi, &server->cb_volumes, srv_link) { + if (vi->vid < fid->vid) + continue; + if (vi->vid > fid->vid) { + vi = NULL; + break; + } + //atomic_inc(&vi->usage); + break; + } + + /* TODO: Find all matching volumes if we couldn't match the server and + * break them anyway. + */ + if (!vi) + goto out; /* Step through all interested superblocks. There may be more than one * because of cell aliasing. */ - list_for_each_entry(cbi, &server->cb_interests, cb_link) { - if (cbi->vid != fid->vid) - continue; - + hlist_for_each_entry(cbi, &vi->cb_interests, cb_vlink) { if (fid->vnode == 0 && fid->unique == 0) { /* The callback break applies to an entire volume. */ struct afs_super_info *as = AFS_FS_S(cbi->sb); @@ -217,6 +292,7 @@ static void afs_break_one_callback(struct afs_server *server, } } +out: read_unlock(&server->cb_break_lock); } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index fdf4c36cff79..f3d0bef16d78 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -15,6 +15,7 @@ #include <linux/dns_resolver.h> #include <linux/sched.h> #include <linux/inet.h> +#include <linux/namei.h> #include <keys/rxrpc-type.h> #include "internal.h" @@ -341,8 +342,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) /* install the new cell */ write_seqlock(&net->cells_lock); - old_root = net->ws_cell; - net->ws_cell = new_root; + old_root = rcu_access_pointer(net->ws_cell); + rcu_assign_pointer(net->ws_cell, new_root); write_sequnlock(&net->cells_lock); afs_put_cell(net, old_root); @@ -528,12 +529,14 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) NULL, 0, cell, 0, true); #endif - ret = afs_proc_cell_setup(net, cell); + ret = afs_proc_cell_setup(cell); if (ret < 0) return ret; - spin_lock(&net->proc_cells_lock); + + mutex_lock(&net->proc_cells_lock); list_add_tail(&cell->proc_link, &net->proc_cells); - spin_unlock(&net->proc_cells_lock); + afs_dynroot_mkdir(net, cell); + mutex_unlock(&net->proc_cells_lock); return 0; } @@ -544,11 +547,12 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) { _enter("%s", cell->name); - afs_proc_cell_remove(net, cell); + afs_proc_cell_remove(cell); - spin_lock(&net->proc_cells_lock); + mutex_lock(&net->proc_cells_lock); list_del_init(&cell->proc_link); - spin_unlock(&net->proc_cells_lock); + afs_dynroot_rmdir(net, cell); + mutex_unlock(&net->proc_cells_lock); #ifdef CONFIG_AFS_FSCACHE fscache_relinquish_cookie(cell->cache, NULL, false); @@ -755,8 +759,8 @@ void afs_cell_purge(struct afs_net *net) _enter(""); write_seqlock(&net->cells_lock); - ws = net->ws_cell; - net->ws_cell = NULL; + ws = rcu_access_pointer(net->ws_cell); + RCU_INIT_POINTER(net->ws_cell, NULL); write_sequnlock(&net->cells_lock); afs_put_cell(net, ws); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 238fd28cfdd2..9e51d6fe7e8f 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -526,7 +526,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) nifs = 0; ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL); if (ifs) { - nifs = afs_get_ipv4_interfaces(ifs, 32, false); + nifs = afs_get_ipv4_interfaces(call->net, ifs, 32, false); if (nifs < 0) { kfree(ifs); ifs = NULL; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 7d623008157f..855bf2b79fed 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -822,6 +822,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, { struct afs_vnode *dvnode = AFS_FS_I(dir); struct inode *inode; + struct dentry *d; struct key *key; int ret; @@ -862,43 +863,17 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, afs_stat_v(dvnode, n_lookup); inode = afs_do_lookup(dir, dentry, key); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - if (ret == -ENOENT) { - inode = afs_try_auto_mntpt(dentry, dir); - if (!IS_ERR(inode)) { - key_put(key); - goto success; - } - - ret = PTR_ERR(inode); - } - - key_put(key); - if (ret == -ENOENT) { - d_add(dentry, NULL); - _leave(" = NULL [negative]"); - return NULL; - } - _leave(" = %d [do]", ret); - return ERR_PTR(ret); - } - dentry->d_fsdata = (void *)(unsigned long)dvnode->status.data_version; - - /* instantiate the dentry */ key_put(key); - if (IS_ERR(inode)) { - _leave(" = %ld", PTR_ERR(inode)); - return ERR_CAST(inode); + if (inode == ERR_PTR(-ENOENT)) { + inode = afs_try_auto_mntpt(dentry, dir); + } else { + dentry->d_fsdata = + (void *)(unsigned long)dvnode->status.data_version; } - -success: - d_add(dentry, inode); - _leave(" = 0 { ino=%lu v=%u }", - d_inode(dentry)->i_ino, - d_inode(dentry)->i_generation); - - return NULL; + d = d_splice_alias(inode, dentry); + if (!IS_ERR_OR_NULL(d)) + d->d_fsdata = dentry->d_fsdata; + return d; } /* diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 983f3946ab57..1cde710a8013 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -1,4 +1,4 @@ -/* dir.c: AFS dynamic root handling +/* AFS dynamic root handling * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) @@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry) return 0; } - ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL); + ret = dns_query("afsdb", name, len, "", NULL, NULL); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; @@ -83,7 +83,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir) out: _leave("= %d", ret); - return ERR_PTR(ret); + return ret == -ENOENT ? NULL : ERR_PTR(ret); } /* @@ -141,12 +141,6 @@ out_p: static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - struct afs_vnode *vnode; - struct inode *inode; - int ret; - - vnode = AFS_FS_I(dir); - _enter("%pd", dentry); ASSERTCMP(d_inode(dentry), ==, NULL); @@ -160,22 +154,7 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr memcmp(dentry->d_name.name, "@cell", 5) == 0) return afs_lookup_atcell(dentry); - inode = afs_try_auto_mntpt(dentry, dir); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - if (ret == -ENOENT) { - d_add(dentry, NULL); - _leave(" = NULL [negative]"); - return NULL; - } - _leave(" = %d [do]", ret); - return ERR_PTR(ret); - } - - d_add(dentry, inode); - _leave(" = 0 { ino=%lu v=%u }", - d_inode(dentry)->i_ino, d_inode(dentry)->i_generation); - return NULL; + return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry); } const struct inode_operations afs_dynroot_inode_operations = { @@ -207,3 +186,125 @@ const struct dentry_operations afs_dynroot_dentry_operations = { .d_release = afs_d_release, .d_automount = afs_d_automount, }; + +/* + * Create a manually added cell mount directory. + * - The caller must hold net->proc_cells_lock + */ +int afs_dynroot_mkdir(struct afs_net *net, struct afs_cell *cell) +{ + struct super_block *sb = net->dynroot_sb; + struct dentry *root, *subdir; + int ret; + + if (!sb || atomic_read(&sb->s_active) == 0) + return 0; + + /* Let the ->lookup op do the creation */ + root = sb->s_root; + inode_lock(root->d_inode); + subdir = lookup_one_len(cell->name, root, cell->name_len); + if (IS_ERR(subdir)) { + ret = PTR_ERR(subdir); + goto unlock; + } + + /* Note that we're retaining an extra ref on the dentry */ + subdir->d_fsdata = (void *)1UL; + ret = 0; +unlock: + inode_unlock(root->d_inode); + return ret; +} + +/* + * Remove a manually added cell mount directory. + * - The caller must hold net->proc_cells_lock + */ +void afs_dynroot_rmdir(struct afs_net *net, struct afs_cell *cell) +{ + struct super_block *sb = net->dynroot_sb; + struct dentry *root, *subdir; + + if (!sb || atomic_read(&sb->s_active) == 0) + return; + + root = sb->s_root; + inode_lock(root->d_inode); + + /* Don't want to trigger a lookup call, which will re-add the cell */ + subdir = try_lookup_one_len(cell->name, root, cell->name_len); + if (IS_ERR_OR_NULL(subdir)) { + _debug("lookup %ld", PTR_ERR(subdir)); + goto no_dentry; + } + + _debug("rmdir %pd %u", subdir, d_count(subdir)); + + if (subdir->d_fsdata) { + _debug("unpin %u", d_count(subdir)); + subdir->d_fsdata = NULL; + dput(subdir); + } + dput(subdir); +no_dentry: + inode_unlock(root->d_inode); + _leave(""); +} + +/* + * Populate a newly created dynamic root with cell names. + */ +int afs_dynroot_populate(struct super_block *sb) +{ + struct afs_cell *cell; + struct afs_net *net = afs_sb2net(sb); + int ret; + + if (mutex_lock_interruptible(&net->proc_cells_lock) < 0) + return -ERESTARTSYS; + + net->dynroot_sb = sb; + list_for_each_entry(cell, &net->proc_cells, proc_link) { + ret = afs_dynroot_mkdir(net, cell); + if (ret < 0) + goto error; + } + + ret = 0; +out: + mutex_unlock(&net->proc_cells_lock); + return ret; + +error: + net->dynroot_sb = NULL; + goto out; +} + +/* + * When a dynamic root that's in the process of being destroyed, depopulate it + * of pinned directories. + */ +void afs_dynroot_depopulate(struct super_block *sb) +{ + struct afs_net *net = afs_sb2net(sb); + struct dentry *root = sb->s_root, *subdir, *tmp; + + /* Prevent more subdirs from being created */ + mutex_lock(&net->proc_cells_lock); + if (net->dynroot_sb == sb) + net->dynroot_sb = NULL; + mutex_unlock(&net->proc_cells_lock); + + inode_lock(root->d_inode); + + /* Remove all the pins for dirs created for manually added cells */ + list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) { + if (subdir->d_fsdata) { + subdir->d_fsdata = NULL; + dput(subdir); + } + } + + inode_unlock(root->d_inode); +} diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index b273e1d60478..50929cb91732 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -72,7 +72,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode, const afs_dataversion_t *expected_version, u8 flags) { - struct timespec t; + struct timespec64 t; umode_t mode; t.tv_sec = status->mtime_client; @@ -138,10 +138,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, u64 data_version, size; u32 type, abort_code; u8 flags = 0; - int ret; - - if (vnode) - write_seqlock(&vnode->cb_lock); abort_code = ntohl(xdr->abort_code); @@ -154,8 +150,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, * case. */ status->abort_code = abort_code; - ret = 0; - goto out; + return 0; } pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); @@ -164,8 +159,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, if (abort_code != 0 && inline_error) { status->abort_code = abort_code; - ret = 0; - goto out; + return 0; } type = ntohl(xdr->type); @@ -235,17 +229,35 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, flags); } - ret = 0; - -out: - if (vnode) - write_sequnlock(&vnode->cb_lock); - return ret; + return 0; bad: xdr_dump_bad(*_bp); - ret = afs_protocol_error(call, -EBADMSG); - goto out; + return afs_protocol_error(call, -EBADMSG); +} + +/* + * Decode the file status. We need to lock the target vnode if we're going to + * update its status so that stat() sees the attributes update atomically. + */ +static int afs_decode_status(struct afs_call *call, + const __be32 **_bp, + struct afs_file_status *status, + struct afs_vnode *vnode, + const afs_dataversion_t *expected_version, + struct afs_read *read_req) +{ + int ret; + + if (!vnode) + return xdr_decode_AFSFetchStatus(call, _bp, status, vnode, + expected_version, read_req); + + write_seqlock(&vnode->cb_lock); + ret = xdr_decode_AFSFetchStatus(call, _bp, status, vnode, + expected_version, read_req); + write_sequnlock(&vnode->cb_lock); + return ret; } /* @@ -387,8 +399,8 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) @@ -568,8 +580,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) return ret; bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &vnode->status.data_version, req) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &vnode->status.data_version, req) < 0) return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack(call, vnode, &bp); if (call->reply[1]) @@ -721,9 +733,9 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 || - xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 || + afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); xdr_decode_AFSCallBack_raw(&bp, call->reply[3]); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -827,8 +839,8 @@ static int afs_deliver_fs_remove(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -917,9 +929,9 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 || - xdr_decode_AFSFetchStatus(call, &bp, &dvnode->status, dvnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 || + afs_decode_status(call, &bp, &dvnode->status, dvnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -1004,9 +1016,9 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; xdr_decode_AFSFid(&bp, call->reply[1]); - if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) || - xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) || + afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -1110,12 +1122,12 @@ static int afs_deliver_fs_rename(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &orig_dvnode->status, orig_dvnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); if (new_dvnode != orig_dvnode && - xdr_decode_AFSFetchStatus(call, &bp, &new_dvnode->status, new_dvnode, - &call->expected_version_2, NULL) < 0) + afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode, + &call->expected_version_2, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -1219,8 +1231,8 @@ static int afs_deliver_fs_store_data(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -1395,8 +1407,8 @@ static int afs_deliver_fs_store_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, - &call->expected_version, NULL) < 0) + if (afs_decode_status(call, &bp, &vnode->status, vnode, + &call->expected_version, NULL) < 0) return afs_protocol_error(call, -EBADMSG); /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */ @@ -2097,8 +2109,8 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFetchStatus(call, &bp, status, vnode, - &call->expected_version, NULL); + afs_decode_status(call, &bp, status, vnode, + &call->expected_version, NULL); callback[call->count].version = ntohl(bp[0]); callback[call->count].expiry = ntohl(bp[1]); callback[call->count].type = ntohl(bp[2]); @@ -2209,9 +2221,9 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) bp = call->buffer; statuses = call->reply[1]; - if (xdr_decode_AFSFetchStatus(call, &bp, &statuses[call->count], - call->count == 0 ? vnode : NULL, - NULL, NULL) < 0) + if (afs_decode_status(call, &bp, &statuses[call->count], + call->count == 0 ? vnode : NULL, + NULL, NULL) < 0) return afs_protocol_error(call, -EBADMSG); call->count++; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e3f8a46663db..9778df135717 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -22,6 +22,8 @@ #include <linux/backing-dev.h> #include <linux/uuid.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/sock.h> #include <net/af_rxrpc.h> #include "afs.h" @@ -40,7 +42,8 @@ struct afs_mount_params { afs_voltype_t type; /* type of volume requested */ int volnamesz; /* size of volume name */ const char *volname; /* name of volume to mount */ - struct afs_net *net; /* Network namespace in effect */ + struct net *net_ns; /* Network namespace in effect */ + struct afs_net *net; /* the AFS net namespace stuff */ struct afs_cell *cell; /* cell in which to find volume */ struct afs_volume *volume; /* volume record */ struct key *key; /* key to use for secure mounting */ @@ -189,7 +192,7 @@ struct afs_read { * - there's one superblock per volume */ struct afs_super_info { - struct afs_net *net; /* Network namespace */ + struct net *net_ns; /* Network namespace */ struct afs_cell *cell; /* The cell in which the volume resides */ struct afs_volume *volume; /* volume record */ bool dyn_root; /* True if dynamic root */ @@ -210,7 +213,6 @@ struct afs_sysnames { char *subs[AFS_NR_SYSNAME]; refcount_t usage; unsigned short nr; - short error; char blank[1]; }; @@ -218,6 +220,7 @@ struct afs_sysnames { * AFS network namespace record. */ struct afs_net { + struct net *net; /* Backpointer to the owning net namespace */ struct afs_uuid uuid; bool live; /* F if this namespace is being removed */ @@ -231,13 +234,13 @@ struct afs_net { /* Cell database */ struct rb_root cells; - struct afs_cell *ws_cell; + struct afs_cell __rcu *ws_cell; struct work_struct cells_manager; struct timer_list cells_timer; atomic_t cells_outstanding; seqlock_t cells_lock; - spinlock_t proc_cells_lock; + struct mutex proc_cells_lock; struct list_head proc_cells; /* Known servers. Theoretically each fileserver can only be in one @@ -261,6 +264,7 @@ struct afs_net { struct mutex lock_manager_mutex; /* Misc */ + struct super_block *dynroot_sb; /* Dynamic root mount superblock */ struct proc_dir_entry *proc_afs; /* /proc/net/afs directory */ struct afs_sysnames *sysnames; rwlock_t sysnames_lock; @@ -280,7 +284,6 @@ struct afs_net { }; extern const char afs_init_sysname[]; -extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns enum afs_cell_state { AFS_CELL_UNSET, @@ -404,16 +407,27 @@ struct afs_server { rwlock_t fs_lock; /* access lock */ /* callback promise management */ - struct list_head cb_interests; /* List of superblocks using this server */ + struct hlist_head cb_volumes; /* List of volume interests on this server */ unsigned cb_s_break; /* Break-everything counter. */ rwlock_t cb_break_lock; /* Volume finding lock */ }; /* + * Volume collation in the server's callback interest list. + */ +struct afs_vol_interest { + struct hlist_node srv_link; /* Link in server->cb_volumes */ + struct hlist_head cb_interests; /* List of callback interests on the server */ + afs_volid_t vid; /* Volume ID to match */ + unsigned int usage; +}; + +/* * Interest by a superblock on a server. */ struct afs_cb_interest { - struct list_head cb_link; /* Link in server->cb_interests */ + struct hlist_node cb_vlink; /* Link in vol_interest->cb_interests */ + struct afs_vol_interest *vol_interest; struct afs_server *server; /* Server on which this interest resides */ struct super_block *sb; /* Superblock on which inodes reside */ afs_volid_t vid; /* Volume ID to match */ @@ -720,6 +734,10 @@ extern const struct inode_operations afs_dynroot_inode_operations; extern const struct dentry_operations afs_dynroot_dentry_operations; extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *); +extern int afs_dynroot_mkdir(struct afs_net *, struct afs_cell *); +extern void afs_dynroot_rmdir(struct afs_net *, struct afs_cell *); +extern int afs_dynroot_populate(struct super_block *); +extern void afs_dynroot_depopulate(struct super_block *); /* * file.c @@ -806,34 +824,36 @@ extern int afs_drop_inode(struct inode *); * main.c */ extern struct workqueue_struct *afs_wq; +extern int afs_net_id; -static inline struct afs_net *afs_d2net(struct dentry *dentry) +static inline struct afs_net *afs_net(struct net *net) { - return &__afs_net; + return net_generic(net, afs_net_id); } -static inline struct afs_net *afs_i2net(struct inode *inode) +static inline struct afs_net *afs_sb2net(struct super_block *sb) { - return &__afs_net; + return afs_net(AFS_FS_S(sb)->net_ns); } -static inline struct afs_net *afs_v2net(struct afs_vnode *vnode) +static inline struct afs_net *afs_d2net(struct dentry *dentry) { - return &__afs_net; + return afs_sb2net(dentry->d_sb); } -static inline struct afs_net *afs_sock2net(struct sock *sk) +static inline struct afs_net *afs_i2net(struct inode *inode) { - return &__afs_net; + return afs_sb2net(inode->i_sb); } -static inline struct afs_net *afs_get_net(struct afs_net *net) +static inline struct afs_net *afs_v2net(struct afs_vnode *vnode) { - return net; + return afs_i2net(&vnode->vfs_inode); } -static inline void afs_put_net(struct afs_net *net) +static inline struct afs_net *afs_sock2net(struct sock *sk) { + return net_generic(sock_net(sk), afs_net_id); } static inline void __afs_stat(atomic_t *s) @@ -861,16 +881,25 @@ extern void afs_mntpt_kill_timer(void); /* * netdevices.c */ -extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool); +extern int afs_get_ipv4_interfaces(struct afs_net *, struct afs_interface *, + size_t, bool); /* * proc.c */ +#ifdef CONFIG_PROC_FS extern int __net_init afs_proc_init(struct afs_net *); extern void __net_exit afs_proc_cleanup(struct afs_net *); -extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *); -extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *); +extern int afs_proc_cell_setup(struct afs_cell *); +extern void afs_proc_cell_remove(struct afs_cell *); extern void afs_put_sysnames(struct afs_sysnames *); +#else +static inline int afs_proc_init(struct afs_net *net) { return 0; } +static inline void afs_proc_cleanup(struct afs_net *net) {} +static inline int afs_proc_cell_setup(struct afs_cell *cell) { return 0; } +static inline void afs_proc_cell_remove(struct afs_cell *cell) {} +static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {} +#endif /* * rotate.c @@ -1002,7 +1031,7 @@ extern bool afs_annotate_server_list(struct afs_server_list *, struct afs_server * super.c */ extern int __init afs_fs_init(void); -extern void __exit afs_fs_exit(void); +extern void afs_fs_exit(void); /* * vlclient.c diff --git a/fs/afs/main.c b/fs/afs/main.c index d7560168b3bf..e84fe822a960 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -15,6 +15,7 @@ #include <linux/completion.h> #include <linux/sched.h> #include <linux/random.h> +#include <linux/proc_fs.h> #define CREATE_TRACE_POINTS #include "internal.h" @@ -32,7 +33,7 @@ module_param(rootcell, charp, 0); MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); struct workqueue_struct *afs_wq; -struct afs_net __afs_net; +static struct proc_dir_entry *afs_proc_symlink; #if defined(CONFIG_ALPHA) const char afs_init_sysname[] = "alpha_linux26"; @@ -67,11 +68,13 @@ const char afs_init_sysname[] = "unknown_linux26"; /* * Initialise an AFS network namespace record. */ -static int __net_init afs_net_init(struct afs_net *net) +static int __net_init afs_net_init(struct net *net_ns) { struct afs_sysnames *sysnames; + struct afs_net *net = afs_net(net_ns); int ret; + net->net = net_ns; net->live = true; generate_random_uuid((unsigned char *)&net->uuid); @@ -83,7 +86,7 @@ static int __net_init afs_net_init(struct afs_net *net) INIT_WORK(&net->cells_manager, afs_manage_cells); timer_setup(&net->cells_timer, afs_cells_timer, 0); - spin_lock_init(&net->proc_cells_lock); + mutex_init(&net->proc_cells_lock); INIT_LIST_HEAD(&net->proc_cells); seqlock_init(&net->fs_lock); @@ -142,8 +145,10 @@ error_sysnames: /* * Clean up and destroy an AFS network namespace record. */ -static void __net_exit afs_net_exit(struct afs_net *net) +static void __net_exit afs_net_exit(struct net *net_ns) { + struct afs_net *net = afs_net(net_ns); + net->live = false; afs_cell_purge(net); afs_purge_servers(net); @@ -152,6 +157,13 @@ static void __net_exit afs_net_exit(struct afs_net *net) afs_put_sysnames(net->sysnames); } +static struct pernet_operations afs_net_ops = { + .init = afs_net_init, + .exit = afs_net_exit, + .id = &afs_net_id, + .size = sizeof(struct afs_net), +}; + /* * initialise the AFS client FS module */ @@ -178,7 +190,7 @@ static int __init afs_init(void) goto error_cache; #endif - ret = afs_net_init(&__afs_net); + ret = register_pernet_subsys(&afs_net_ops); if (ret < 0) goto error_net; @@ -187,10 +199,18 @@ static int __init afs_init(void) if (ret < 0) goto error_fs; + afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs"); + if (IS_ERR(afs_proc_symlink)) { + ret = PTR_ERR(afs_proc_symlink); + goto error_proc; + } + return ret; +error_proc: + afs_fs_exit(); error_fs: - afs_net_exit(&__afs_net); + unregister_pernet_subsys(&afs_net_ops); error_net: #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); @@ -219,8 +239,9 @@ static void __exit afs_exit(void) { printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n"); + proc_remove(afs_proc_symlink); afs_fs_exit(); - afs_net_exit(&__afs_net); + unregister_pernet_subsys(&afs_net_ops); #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); #endif diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c index 50bd5bb1c4fb..2a009d1939d7 100644 --- a/fs/afs/netdevices.c +++ b/fs/afs/netdevices.c @@ -17,8 +17,8 @@ * - maxbufs must be at least 1 * - returns the number of interface records in the buffer */ -int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs, - bool wantloopback) +int afs_get_ipv4_interfaces(struct afs_net *net, struct afs_interface *bufs, + size_t maxbufs, bool wantloopback) { struct net_device *dev; struct in_device *idev; @@ -27,7 +27,7 @@ int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs, ASSERT(maxbufs > 0); rtnl_lock(); - for_each_netdev(&init_net, dev) { + for_each_netdev(net->net, dev) { if (dev->type == ARPHRD_LOOPBACK && !wantloopback) continue; idev = __in_dev_get_rtnl(dev); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 3aad32762989..0c3285c8db95 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -17,240 +17,78 @@ #include <linux/uaccess.h> #include "internal.h" -static inline struct afs_net *afs_proc2net(struct file *f) +static inline struct afs_net *afs_seq2net(struct seq_file *m) { - return &__afs_net; + return afs_net(seq_file_net(m)); } -static inline struct afs_net *afs_seq2net(struct seq_file *m) +static inline struct afs_net *afs_seq2net_single(struct seq_file *m) { - return &__afs_net; // TODO: use seq_file_net(m) + return afs_net(seq_file_single_net(m)); } -static int afs_proc_cells_open(struct inode *inode, struct file *file); -static void *afs_proc_cells_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos); -static void afs_proc_cells_stop(struct seq_file *p, void *v); -static int afs_proc_cells_show(struct seq_file *m, void *v); -static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, - size_t size, loff_t *_pos); - -static const struct seq_operations afs_proc_cells_ops = { - .start = afs_proc_cells_start, - .next = afs_proc_cells_next, - .stop = afs_proc_cells_stop, - .show = afs_proc_cells_show, -}; - -static const struct file_operations afs_proc_cells_fops = { - .open = afs_proc_cells_open, - .read = seq_read, - .write = afs_proc_cells_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf, - size_t size, loff_t *_pos); -static ssize_t afs_proc_rootcell_write(struct file *file, - const char __user *buf, - size_t size, loff_t *_pos); - -static const struct file_operations afs_proc_rootcell_fops = { - .read = afs_proc_rootcell_read, - .write = afs_proc_rootcell_write, - .llseek = no_llseek, -}; - -static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, - loff_t *pos); -static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v); -static int afs_proc_cell_volumes_show(struct seq_file *m, void *v); - -static const struct seq_operations afs_proc_cell_volumes_ops = { - .start = afs_proc_cell_volumes_start, - .next = afs_proc_cell_volumes_next, - .stop = afs_proc_cell_volumes_stop, - .show = afs_proc_cell_volumes_show, -}; - -static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, - loff_t *pos); -static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v); -static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v); - -static const struct seq_operations afs_proc_cell_vlservers_ops = { - .start = afs_proc_cell_vlservers_start, - .next = afs_proc_cell_vlservers_next, - .stop = afs_proc_cell_vlservers_stop, - .show = afs_proc_cell_vlservers_show, -}; - -static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_servers_next(struct seq_file *p, void *v, - loff_t *pos); -static void afs_proc_servers_stop(struct seq_file *p, void *v); -static int afs_proc_servers_show(struct seq_file *m, void *v); - -static const struct seq_operations afs_proc_servers_ops = { - .start = afs_proc_servers_start, - .next = afs_proc_servers_next, - .stop = afs_proc_servers_stop, - .show = afs_proc_servers_show, -}; - -static int afs_proc_sysname_open(struct inode *inode, struct file *file); -static int afs_proc_sysname_release(struct inode *inode, struct file *file); -static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos); -static void *afs_proc_sysname_next(struct seq_file *p, void *v, - loff_t *pos); -static void afs_proc_sysname_stop(struct seq_file *p, void *v); -static int afs_proc_sysname_show(struct seq_file *m, void *v); -static ssize_t afs_proc_sysname_write(struct file *file, - const char __user *buf, - size_t size, loff_t *_pos); - -static const struct seq_operations afs_proc_sysname_ops = { - .start = afs_proc_sysname_start, - .next = afs_proc_sysname_next, - .stop = afs_proc_sysname_stop, - .show = afs_proc_sysname_show, -}; - -static const struct file_operations afs_proc_sysname_fops = { - .open = afs_proc_sysname_open, - .read = seq_read, - .llseek = seq_lseek, - .release = afs_proc_sysname_release, - .write = afs_proc_sysname_write, -}; - -static int afs_proc_stats_show(struct seq_file *m, void *v); - /* - * initialise the /proc/fs/afs/ directory + * Display the list of cells known to the namespace. */ -int afs_proc_init(struct afs_net *net) +static int afs_proc_cells_show(struct seq_file *m, void *v) { - _enter(""); - - net->proc_afs = proc_mkdir("fs/afs", NULL); - if (!net->proc_afs) - goto error_dir; + struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); + struct afs_net *net = afs_seq2net(m); - if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) || - !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) || - !proc_create_seq("servers", 0644, net->proc_afs, &afs_proc_servers_ops) || - !proc_create_single("stats", 0644, net->proc_afs, afs_proc_stats_show) || - !proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops)) - goto error_tree; + if (v == &net->proc_cells) { + /* display header on line 1 */ + seq_puts(m, "USE NAME\n"); + return 0; + } - _leave(" = 0"); + /* display one cell per line on subsequent lines */ + seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name); return 0; - -error_tree: - proc_remove(net->proc_afs); -error_dir: - _leave(" = -ENOMEM"); - return -ENOMEM; -} - -/* - * clean up the /proc/fs/afs/ directory - */ -void afs_proc_cleanup(struct afs_net *net) -{ - proc_remove(net->proc_afs); - net->proc_afs = NULL; -} - -/* - * open "/proc/fs/afs/cells" which provides a summary of extant cells - */ -static int afs_proc_cells_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &afs_proc_cells_ops); } -/* - * set up the iterator to start reading from the cells list and return the - * first item - */ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { - struct afs_net *net = afs_seq2net(m); - rcu_read_lock(); - return seq_list_start_head(&net->proc_cells, *_pos); + return seq_list_start_head(&afs_seq2net(m)->proc_cells, *_pos); } -/* - * move to next cell in cells list - */ static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos) { - struct afs_net *net = afs_seq2net(m); - - return seq_list_next(v, &net->proc_cells, pos); + return seq_list_next(v, &afs_seq2net(m)->proc_cells, pos); } -/* - * clean up after reading from the cells list - */ static void afs_proc_cells_stop(struct seq_file *m, void *v) __releases(rcu) { rcu_read_unlock(); } -/* - * display a header line followed by a load of cell lines - */ -static int afs_proc_cells_show(struct seq_file *m, void *v) -{ - struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link); - struct afs_net *net = afs_seq2net(m); - - if (v == &net->proc_cells) { - /* display header on line 1 */ - seq_puts(m, "USE NAME\n"); - return 0; - } - - /* display one cell per line on subsequent lines */ - seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name); - return 0; -} +static const struct seq_operations afs_proc_cells_ops = { + .start = afs_proc_cells_start, + .next = afs_proc_cells_next, + .stop = afs_proc_cells_stop, + .show = afs_proc_cells_show, +}; /* * handle writes to /proc/fs/afs/cells * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]" */ -static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, - size_t size, loff_t *_pos) +static int afs_proc_cells_write(struct file *file, char *buf, size_t size) { - struct afs_net *net = afs_proc2net(file); - char *kbuf, *name, *args; + struct seq_file *m = file->private_data; + struct afs_net *net = afs_seq2net(m); + char *name, *args; int ret; - /* start by dragging the command into memory */ - if (size <= 1 || size >= PAGE_SIZE) - return -EINVAL; - - kbuf = memdup_user_nul(buf, size); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - /* trim to first NL */ - name = memchr(kbuf, '\n', size); + name = memchr(buf, '\n', size); if (name) *name = 0; /* split into command, name and argslist */ - name = strchr(kbuf, ' '); + name = strchr(buf, ' '); if (!name) goto inval; do { @@ -269,9 +107,9 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, goto inval; /* determine command to perform */ - _debug("cmd=%s name=%s args=%s", kbuf, name, args); + _debug("cmd=%s name=%s args=%s", buf, name, args); - if (strcmp(kbuf, "add") == 0) { + if (strcmp(buf, "add") == 0) { struct afs_cell *cell; cell = afs_lookup_cell(net, name, strlen(name), args, true); @@ -287,10 +125,9 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, goto inval; } - ret = size; + ret = 0; done: - kfree(kbuf); _leave(" = %d", ret); return ret; @@ -300,200 +137,136 @@ inval: goto done; } -static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf, - size_t size, loff_t *_pos) +/* + * Display the name of the current workstation cell. + */ +static int afs_proc_rootcell_show(struct seq_file *m, void *v) { struct afs_cell *cell; - struct afs_net *net = afs_proc2net(file); - unsigned int seq = 0; - char name[AFS_MAXCELLNAME + 1]; - int len; - - if (*_pos > 0) - return 0; - if (!net->ws_cell) - return 0; - - rcu_read_lock(); - do { - read_seqbegin_or_lock(&net->cells_lock, &seq); - len = 0; - cell = rcu_dereference_raw(net->ws_cell); - if (cell) { - len = cell->name_len; - memcpy(name, cell->name, len); - } - } while (need_seqretry(&net->cells_lock, seq)); - done_seqretry(&net->cells_lock, seq); - rcu_read_unlock(); - - if (!len) - return 0; - - name[len++] = '\n'; - if (len > size) - len = size; - if (copy_to_user(buf, name, len) != 0) - return -EFAULT; - *_pos = 1; - return len; + struct afs_net *net; + + net = afs_seq2net_single(m); + if (rcu_access_pointer(net->ws_cell)) { + rcu_read_lock(); + cell = rcu_dereference(net->ws_cell); + if (cell) + seq_printf(m, "%s\n", cell->name); + rcu_read_unlock(); + } + return 0; } /* - * handle writes to /proc/fs/afs/rootcell - * - to initialize rootcell: echo "cell.name:192.168.231.14" + * Set the current workstation cell and optionally supply its list of volume + * location servers. + * + * echo "cell.name:192.168.231.14" >/proc/fs/afs/rootcell */ -static ssize_t afs_proc_rootcell_write(struct file *file, - const char __user *buf, - size_t size, loff_t *_pos) +static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size) { - struct afs_net *net = afs_proc2net(file); - char *kbuf, *s; + struct seq_file *m = file->private_data; + struct afs_net *net = afs_seq2net_single(m); + char *s; int ret; - /* start by dragging the command into memory */ - if (size <= 1 || size >= PAGE_SIZE) - return -EINVAL; - - kbuf = memdup_user_nul(buf, size); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - ret = -EINVAL; - if (kbuf[0] == '.') + if (buf[0] == '.') goto out; - if (memchr(kbuf, '/', size)) + if (memchr(buf, '/', size)) goto out; /* trim to first NL */ - s = memchr(kbuf, '\n', size); + s = memchr(buf, '\n', size); if (s) *s = 0; /* determine command to perform */ - _debug("rootcell=%s", kbuf); + _debug("rootcell=%s", buf); - ret = afs_cell_init(net, kbuf); - if (ret >= 0) - ret = size; /* consume everything, always */ + ret = afs_cell_init(net, buf); out: - kfree(kbuf); _leave(" = %d", ret); return ret; } +static const char afs_vol_types[3][3] = { + [AFSVL_RWVOL] = "RW", + [AFSVL_ROVOL] = "RO", + [AFSVL_BACKVOL] = "BK", +}; + /* - * initialise /proc/fs/afs/<cell>/ + * Display the list of volumes known to a cell. */ -int afs_proc_cell_setup(struct afs_net *net, struct afs_cell *cell) +static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) { - struct proc_dir_entry *dir; - - _enter("%p{%s},%p", cell, cell->name, net->proc_afs); + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); - dir = proc_mkdir(cell->name, net->proc_afs); - if (!dir) - goto error_dir; + /* Display header on line 1 */ + if (v == &cell->proc_volumes) { + seq_puts(m, "USE VID TY\n"); + return 0; + } - if (!proc_create_seq_data("vlservers", 0, dir, - &afs_proc_cell_vlservers_ops, cell)) - goto error_tree; - if (!proc_create_seq_data("volumes", 0, dir, &afs_proc_cell_volumes_ops, - cell)) - goto error_tree; + seq_printf(m, "%3d %08x %s\n", + atomic_read(&vol->usage), vol->vid, + afs_vol_types[vol->type]); - _leave(" = 0"); return 0; - -error_tree: - remove_proc_subtree(cell->name, net->proc_afs); -error_dir: - _leave(" = -ENOMEM"); - return -ENOMEM; } -/* - * remove /proc/fs/afs/<cell>/ - */ -void afs_proc_cell_remove(struct afs_net *net, struct afs_cell *cell) -{ - _enter(""); - - remove_proc_subtree(cell->name, net->proc_afs); - - _leave(""); -} - -/* - * set up the iterator to start reading from the cells list and return the - * first item - */ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) __acquires(cell->proc_lock) { struct afs_cell *cell = PDE_DATA(file_inode(m->file)); - _enter("cell=%p pos=%Ld", cell, *_pos); - read_lock(&cell->proc_lock); return seq_list_start_head(&cell->proc_volumes, *_pos); } -/* - * move to next cell in cells list - */ -static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, +static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_cell *cell = PDE_DATA(file_inode(p->file)); + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); - _enter("cell=%p pos=%Ld", cell, *_pos); return seq_list_next(v, &cell->proc_volumes, _pos); } -/* - * clean up after reading from the cells list - */ -static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v) +static void afs_proc_cell_volumes_stop(struct seq_file *m, void *v) __releases(cell->proc_lock) { - struct afs_cell *cell = PDE_DATA(file_inode(p->file)); + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); read_unlock(&cell->proc_lock); } -static const char afs_vol_types[3][3] = { - [AFSVL_RWVOL] = "RW", - [AFSVL_ROVOL] = "RO", - [AFSVL_BACKVOL] = "BK", +static const struct seq_operations afs_proc_cell_volumes_ops = { + .start = afs_proc_cell_volumes_start, + .next = afs_proc_cell_volumes_next, + .stop = afs_proc_cell_volumes_stop, + .show = afs_proc_cell_volumes_show, }; /* - * display a header line followed by a load of volume lines + * Display the list of Volume Location servers we're using for a cell. */ -static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) +static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) { - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); - struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); + struct sockaddr_rxrpc *addr = v; - /* Display header on line 1 */ - if (v == &cell->proc_volumes) { - seq_puts(m, "USE VID TY\n"); + /* display header on line 1 */ + if (v == (void *)1) { + seq_puts(m, "ADDRESS\n"); return 0; } - seq_printf(m, "%3d %08x %s\n", - atomic_read(&vol->usage), vol->vid, - afs_vol_types[vol->type]); - + /* display one cell per line on subsequent lines */ + seq_printf(m, "%pISp\n", &addr->transport); return 0; } -/* - * set up the iterator to start reading from the cells list and return the - * first item - */ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { @@ -516,14 +289,11 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) return alist->addrs + pos; } -/* - * move to next cell in cells list - */ -static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, +static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v, loff_t *_pos) { struct afs_addr_list *alist; - struct afs_cell *cell = PDE_DATA(file_inode(p->file)); + struct afs_cell *cell = PDE_DATA(file_inode(m->file)); loff_t pos; alist = rcu_dereference(cell->vl_addrs); @@ -536,161 +306,145 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, return alist->addrs + pos; } -/* - * clean up after reading from the cells list - */ -static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v) +static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v) __releases(rcu) { rcu_read_unlock(); } +static const struct seq_operations afs_proc_cell_vlservers_ops = { + .start = afs_proc_cell_vlservers_start, + .next = afs_proc_cell_vlservers_next, + .stop = afs_proc_cell_vlservers_stop, + .show = afs_proc_cell_vlservers_show, +}; + /* - * display a header line followed by a load of volume lines + * Display the list of fileservers we're using within a namespace. */ -static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) +static int afs_proc_servers_show(struct seq_file *m, void *v) { - struct sockaddr_rxrpc *addr = v; + struct afs_server *server; + struct afs_addr_list *alist; + int i; - /* display header on line 1 */ - if (v == (void *)1) { - seq_puts(m, "ADDRESS\n"); + if (v == SEQ_START_TOKEN) { + seq_puts(m, "UUID USE ADDR\n"); return 0; } - /* display one cell per line on subsequent lines */ - seq_printf(m, "%pISp\n", &addr->transport); + server = list_entry(v, struct afs_server, proc_link); + alist = rcu_dereference(server->addresses); + seq_printf(m, "%pU %3d %pISpc%s\n", + &server->uuid, + atomic_read(&server->usage), + &alist->addrs[0].transport, + alist->index == 0 ? "*" : ""); + for (i = 1; i < alist->nr_addrs; i++) + seq_printf(m, " %pISpc%s\n", + &alist->addrs[i].transport, + alist->index == i ? "*" : ""); return 0; } -/* - * Set up the iterator to start reading from the server list and return the - * first item. - */ static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos) __acquires(rcu) { - struct afs_net *net = afs_seq2net(m); - rcu_read_lock(); - return seq_hlist_start_head_rcu(&net->fs_proc, *_pos); + return seq_hlist_start_head_rcu(&afs_seq2net(m)->fs_proc, *_pos); } -/* - * move to next cell in cells list - */ static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_net *net = afs_seq2net(m); - - return seq_hlist_next_rcu(v, &net->fs_proc, _pos); + return seq_hlist_next_rcu(v, &afs_seq2net(m)->fs_proc, _pos); } -/* - * clean up after reading from the cells list - */ -static void afs_proc_servers_stop(struct seq_file *p, void *v) +static void afs_proc_servers_stop(struct seq_file *m, void *v) __releases(rcu) { rcu_read_unlock(); } +static const struct seq_operations afs_proc_servers_ops = { + .start = afs_proc_servers_start, + .next = afs_proc_servers_next, + .stop = afs_proc_servers_stop, + .show = afs_proc_servers_show, +}; + /* - * display a header line followed by a load of volume lines + * Display the list of strings that may be substituted for the @sys pathname + * macro. */ -static int afs_proc_servers_show(struct seq_file *m, void *v) +static int afs_proc_sysname_show(struct seq_file *m, void *v) { - struct afs_server *server; - struct afs_addr_list *alist; - - if (v == SEQ_START_TOKEN) { - seq_puts(m, "UUID USE ADDR\n"); - return 0; - } + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *sysnames = net->sysnames; + unsigned int i = (unsigned long)v - 1; - server = list_entry(v, struct afs_server, proc_link); - alist = rcu_dereference(server->addresses); - seq_printf(m, "%pU %3d %pISp\n", - &server->uuid, - atomic_read(&server->usage), - &alist->addrs[alist->index].transport); + if (i < sysnames->nr) + seq_printf(m, "%s\n", sysnames->subs[i]); return 0; } -void afs_put_sysnames(struct afs_sysnames *sysnames) +static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos) + __acquires(&net->sysnames_lock) { - int i; + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names; - if (sysnames && refcount_dec_and_test(&sysnames->usage)) { - for (i = 0; i < sysnames->nr; i++) - if (sysnames->subs[i] != afs_init_sysname && - sysnames->subs[i] != sysnames->blank) - kfree(sysnames->subs[i]); - } + read_lock(&net->sysnames_lock); + + names = net->sysnames; + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); } -/* - * Handle opening of /proc/fs/afs/sysname. If it is opened for writing, we - * assume the caller wants to change the substitution list and we allocate a - * buffer to hold the list. - */ -static int afs_proc_sysname_open(struct inode *inode, struct file *file) +static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos) { - struct afs_sysnames *sysnames; - struct seq_file *m; - int ret; - - ret = seq_open(file, &afs_proc_sysname_ops); - if (ret < 0) - return ret; + struct afs_net *net = afs_seq2net(m); + struct afs_sysnames *names = net->sysnames; - if (file->f_mode & FMODE_WRITE) { - sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); - if (!sysnames) { - seq_release(inode, file); - return -ENOMEM; - } + *pos += 1; + if (*pos >= names->nr) + return NULL; + return (void *)(unsigned long)(*pos + 1); +} - refcount_set(&sysnames->usage, 1); - m = file->private_data; - m->private = sysnames; - } +static void afs_proc_sysname_stop(struct seq_file *m, void *v) + __releases(&net->sysnames_lock) +{ + struct afs_net *net = afs_seq2net(m); - return 0; + read_unlock(&net->sysnames_lock); } +static const struct seq_operations afs_proc_sysname_ops = { + .start = afs_proc_sysname_start, + .next = afs_proc_sysname_next, + .stop = afs_proc_sysname_stop, + .show = afs_proc_sysname_show, +}; + /* - * Handle writes to /proc/fs/afs/sysname to set the @sys substitution. + * Allow the @sys substitution to be configured. */ -static ssize_t afs_proc_sysname_write(struct file *file, - const char __user *buf, - size_t size, loff_t *_pos) +static int afs_proc_sysname_write(struct file *file, char *buf, size_t size) { - struct afs_sysnames *sysnames; + struct afs_sysnames *sysnames, *kill; struct seq_file *m = file->private_data; - char *kbuf = NULL, *s, *p, *sub; + struct afs_net *net = afs_seq2net(m); + char *s, *p, *sub; int ret, len; - sysnames = m->private; + sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); if (!sysnames) - return -EINVAL; - if (sysnames->error) - return sysnames->error; - - if (size >= PAGE_SIZE - 1) { - sysnames->error = -EINVAL; - return -EINVAL; - } - if (size == 0) - return 0; - - kbuf = memdup_user_nul(buf, size); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - - inode_lock(file_inode(file)); + return -ENOMEM; + refcount_set(&sysnames->usage, 1); + kill = sysnames; - p = kbuf; + p = buf; while ((s = strsep(&p, " \t\n"))) { len = strlen(s); if (len == 0) @@ -731,85 +485,36 @@ static ssize_t afs_proc_sysname_write(struct file *file, sysnames->nr++; } - ret = size; /* consume everything, always */ + if (sysnames->nr == 0) { + sysnames->subs[0] = sysnames->blank; + sysnames->nr++; + } + + write_lock(&net->sysnames_lock); + kill = net->sysnames; + net->sysnames = sysnames; + write_unlock(&net->sysnames_lock); + ret = 0; out: - inode_unlock(file_inode(file)); - kfree(kbuf); + afs_put_sysnames(kill); return ret; invalid: ret = -EINVAL; error: - sysnames->error = ret; goto out; } -static int afs_proc_sysname_release(struct inode *inode, struct file *file) +void afs_put_sysnames(struct afs_sysnames *sysnames) { - struct afs_sysnames *sysnames, *kill = NULL; - struct seq_file *m = file->private_data; - struct afs_net *net = afs_seq2net(m); + int i; - sysnames = m->private; - if (sysnames) { - if (!sysnames->error) { - kill = sysnames; - if (sysnames->nr == 0) { - sysnames->subs[0] = sysnames->blank; - sysnames->nr++; - } - write_lock(&net->sysnames_lock); - kill = net->sysnames; - net->sysnames = sysnames; - write_unlock(&net->sysnames_lock); - } - afs_put_sysnames(kill); + if (sysnames && refcount_dec_and_test(&sysnames->usage)) { + for (i = 0; i < sysnames->nr; i++) + if (sysnames->subs[i] != afs_init_sysname && + sysnames->subs[i] != sysnames->blank) + kfree(sysnames->subs[i]); } - - return seq_release(inode, file); -} - -static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos) - __acquires(&net->sysnames_lock) -{ - struct afs_net *net = afs_seq2net(m); - struct afs_sysnames *names = net->sysnames; - - read_lock(&net->sysnames_lock); - - if (*pos >= names->nr) - return NULL; - return (void *)(unsigned long)(*pos + 1); -} - -static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct afs_net *net = afs_seq2net(m); - struct afs_sysnames *names = net->sysnames; - - *pos += 1; - if (*pos >= names->nr) - return NULL; - return (void *)(unsigned long)(*pos + 1); -} - -static void afs_proc_sysname_stop(struct seq_file *m, void *v) - __releases(&net->sysnames_lock) -{ - struct afs_net *net = afs_seq2net(m); - - read_unlock(&net->sysnames_lock); -} - -static int afs_proc_sysname_show(struct seq_file *m, void *v) -{ - struct afs_net *net = afs_seq2net(m); - struct afs_sysnames *sysnames = net->sysnames; - unsigned int i = (unsigned long)v - 1; - - if (i < sysnames->nr) - seq_printf(m, "%s\n", sysnames->subs[i]); - return 0; } /* @@ -817,7 +522,7 @@ static int afs_proc_sysname_show(struct seq_file *m, void *v) */ static int afs_proc_stats_show(struct seq_file *m, void *v) { - struct afs_net *net = afs_seq2net(m); + struct afs_net *net = afs_seq2net_single(m); seq_puts(m, "kAFS statistics\n"); @@ -842,3 +547,101 @@ static int afs_proc_stats_show(struct seq_file *m, void *v) atomic_long_read(&net->n_store_bytes)); return 0; } + +/* + * initialise /proc/fs/afs/<cell>/ + */ +int afs_proc_cell_setup(struct afs_cell *cell) +{ + struct proc_dir_entry *dir; + struct afs_net *net = cell->net; + + _enter("%p{%s},%p", cell, cell->name, net->proc_afs); + + dir = proc_net_mkdir(net->net, cell->name, net->proc_afs); + if (!dir) + goto error_dir; + + if (!proc_create_net_data("vlservers", 0444, dir, + &afs_proc_cell_vlservers_ops, + sizeof(struct seq_net_private), + cell) || + !proc_create_net_data("volumes", 0444, dir, + &afs_proc_cell_volumes_ops, + sizeof(struct seq_net_private), + cell)) + goto error_tree; + + _leave(" = 0"); + return 0; + +error_tree: + remove_proc_subtree(cell->name, net->proc_afs); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * remove /proc/fs/afs/<cell>/ + */ +void afs_proc_cell_remove(struct afs_cell *cell) +{ + struct afs_net *net = cell->net; + + _enter(""); + remove_proc_subtree(cell->name, net->proc_afs); + _leave(""); +} + +/* + * initialise the /proc/fs/afs/ directory + */ +int afs_proc_init(struct afs_net *net) +{ + struct proc_dir_entry *p; + + _enter(""); + + p = proc_net_mkdir(net->net, "afs", net->net->proc_net); + if (!p) + goto error_dir; + + if (!proc_create_net_data_write("cells", 0644, p, + &afs_proc_cells_ops, + afs_proc_cells_write, + sizeof(struct seq_net_private), + NULL) || + !proc_create_net_single_write("rootcell", 0644, p, + afs_proc_rootcell_show, + afs_proc_rootcell_write, + NULL) || + !proc_create_net("servers", 0444, p, &afs_proc_servers_ops, + sizeof(struct seq_net_private)) || + !proc_create_net_single("stats", 0444, p, afs_proc_stats_show, NULL) || + !proc_create_net_data_write("sysname", 0644, p, + &afs_proc_sysname_ops, + afs_proc_sysname_write, + sizeof(struct seq_net_private), + NULL)) + goto error_tree; + + net->proc_afs = p; + _leave(" = 0"); + return 0; + +error_tree: + proc_remove(p); +error_dir: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * clean up the /proc/fs/afs/ directory + */ +void afs_proc_cleanup(struct afs_net *net) +{ + proc_remove(net->proc_afs); + net->proc_afs = NULL; +} diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 08735948f15d..183cc5418722 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -46,7 +46,7 @@ int afs_open_socket(struct afs_net *net) _enter(""); - ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket); + ret = sock_create_kern(net->net, AF_RXRPC, SOCK_DGRAM, PF_INET6, &socket); if (ret < 0) goto error_1; @@ -648,7 +648,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, trace_afs_notify_call(rxcall, call); call->need_attention = true; - u = __atomic_add_unless(&call->usage, 1, 0); + u = atomic_fetch_add_unless(&call->usage, 1, 0); if (u != 0) { trace_afs_call(call, afs_call_trace_wake, u, atomic_read(&call->net->nr_outstanding_calls), diff --git a/fs/afs/server.c b/fs/afs/server.c index 3af4625e2f8c..1d329e6981d5 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -228,7 +228,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, server->flags = (1UL << AFS_SERVER_FL_NEW); server->update_at = ktime_get_real_seconds() + afs_server_update_delay; rwlock_init(&server->fs_lock); - INIT_LIST_HEAD(&server->cb_interests); + INIT_HLIST_HEAD(&server->cb_volumes); rwlock_init(&server->cb_break_lock); afs_inc_servers_outstanding(net); diff --git a/fs/afs/super.c b/fs/afs/super.c index 9e5d7966621c..4d3e274207fb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -48,6 +48,8 @@ struct file_system_type afs_fs_type = { }; MODULE_ALIAS_FS("afs"); +int afs_net_id; + static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, @@ -117,7 +119,7 @@ int __init afs_fs_init(void) /* * clean up the filesystem */ -void __exit afs_fs_exit(void) +void afs_fs_exit(void) { _enter(""); @@ -351,14 +353,19 @@ static int afs_test_super(struct super_block *sb, void *data) struct afs_super_info *as1 = data; struct afs_super_info *as = AFS_FS_S(sb); - return (as->net == as1->net && + return (as->net_ns == as1->net_ns && as->volume && - as->volume->vid == as1->volume->vid); + as->volume->vid == as1->volume->vid && + !as->dyn_root); } static int afs_dynroot_test_super(struct super_block *sb, void *data) { - return false; + struct afs_super_info *as1 = data; + struct afs_super_info *as = AFS_FS_S(sb); + + return (as->net_ns == as1->net_ns && + as->dyn_root); } static int afs_set_super(struct super_block *sb, void *data) @@ -418,10 +425,14 @@ static int afs_fill_super(struct super_block *sb, if (!sb->s_root) goto error; - if (params->dyn_root) + if (as->dyn_root) { sb->s_d_op = &afs_dynroot_dentry_operations; - else + ret = afs_dynroot_populate(sb); + if (ret < 0) + goto error; + } else { sb->s_d_op = &afs_fs_dentry_operations; + } _leave(" = 0"); return 0; @@ -437,7 +448,7 @@ static struct afs_super_info *afs_alloc_sbi(struct afs_mount_params *params) as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (as) { - as->net = afs_get_net(params->net); + as->net_ns = get_net(params->net_ns); if (params->dyn_root) as->dyn_root = true; else @@ -450,12 +461,31 @@ static void afs_destroy_sbi(struct afs_super_info *as) { if (as) { afs_put_volume(as->cell, as->volume); - afs_put_cell(as->net, as->cell); - afs_put_net(as->net); + afs_put_cell(afs_net(as->net_ns), as->cell); + put_net(as->net_ns); kfree(as); } } +static void afs_kill_super(struct super_block *sb) +{ + struct afs_super_info *as = AFS_FS_S(sb); + struct afs_net *net = afs_net(as->net_ns); + + if (as->dyn_root) + afs_dynroot_depopulate(sb); + + /* Clear the callback interests (which will do ilookup5) before + * deactivating the superblock. + */ + if (as->volume) + afs_clear_callback_interests(net, as->volume->servers); + kill_anon_super(sb); + if (as->volume) + afs_deactivate_volume(as->volume); + afs_destroy_sbi(as); +} + /* * get an AFS superblock */ @@ -472,12 +502,13 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, _enter(",,%s,%p", dev_name, options); memset(¶ms, 0, sizeof(params)); - params.net = &__afs_net; ret = -EINVAL; if (current->nsproxy->net_ns != &init_net) goto error; - + params.net_ns = current->nsproxy->net_ns; + params.net = afs_net(params.net_ns); + /* parse the options and device name */ if (options) { ret = afs_parse_options(¶ms, options, &dev_name); @@ -563,21 +594,6 @@ error: return ERR_PTR(ret); } -static void afs_kill_super(struct super_block *sb) -{ - struct afs_super_info *as = AFS_FS_S(sb); - - /* Clear the callback interests (which will do ilookup5) before - * deactivating the superblock. - */ - if (as->volume) - afs_clear_callback_interests(as->net, as->volume->servers); - kill_anon_super(sb); - if (as->volume) - afs_deactivate_volume(as->volume); - afs_destroy_sbi(as); -} - /* * Initialise an inode cache slab element prior to any use. Note that * afs_alloc_inode() *must* reset anything that could incorrectly leak from one @@ -19,6 +19,7 @@ #include <linux/export.h> #include <linux/syscalls.h> #include <linux/backing-dev.h> +#include <linux/refcount.h> #include <linux/uio.h> #include <linux/sched/signal.h> @@ -167,13 +168,12 @@ struct fsync_iocb { struct poll_iocb { struct file *file; - __poll_t events; struct wait_queue_head *head; - - union { - struct wait_queue_entry wait; - struct work_struct work; - }; + __poll_t events; + bool woken; + bool cancelled; + struct wait_queue_entry wait; + struct work_struct work; }; struct aio_kiocb { @@ -191,6 +191,7 @@ struct aio_kiocb { struct list_head ki_list; /* the aio core uses this * for cancellation */ + refcount_t ki_refcnt; /* * If the aio_resfd field of the userspace iocb is not zero, @@ -215,9 +216,7 @@ static const struct address_space_operations aio_ctx_aops; static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { - struct qstr this = QSTR_INIT("[aio]", 5); struct file *file; - struct path path; struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); if (IS_ERR(inode)) return ERR_CAST(inode); @@ -226,31 +225,17 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) inode->i_mapping->private_data = ctx; inode->i_size = PAGE_SIZE * nr_pages; - path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); - if (!path.dentry) { + file = alloc_file_pseudo(inode, aio_mnt, "[aio]", + O_RDWR, &aio_ring_fops); + if (IS_ERR(file)) iput(inode); - return ERR_PTR(-ENOMEM); - } - path.mnt = mntget(aio_mnt); - - d_instantiate(path.dentry, inode); - file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops); - if (IS_ERR(file)) { - path_put(&path); - return file; - } - - file->f_flags = O_RDWR; return file; } static struct dentry *aio_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - static const struct dentry_operations ops = { - .d_dname = simple_dname, - }; - struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops, + struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL, AIO_RING_MAGIC); if (!IS_ERR(root)) @@ -1028,6 +1013,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) percpu_ref_get(&ctx->reqs); INIT_LIST_HEAD(&req->ki_list); + refcount_set(&req->ki_refcnt, 0); req->ki_ctx = ctx; return req; out_put: @@ -1062,6 +1048,15 @@ out: return ret; } +static inline void iocb_put(struct aio_kiocb *iocb) +{ + if (refcount_read(&iocb->ki_refcnt) == 0 || + refcount_dec_and_test(&iocb->ki_refcnt)) { + percpu_ref_put(&iocb->ki_ctx->reqs); + kmem_cache_free(kiocb_cachep, iocb); + } +} + /* aio_complete * Called when the io request on the given iocb is complete. */ @@ -1131,8 +1126,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) eventfd_ctx_put(iocb->ki_eventfd); } - kmem_cache_free(kiocb_cachep, iocb); - /* * We have to order our ring_info tail store above and test * of the wait list below outside the wait lock. This is @@ -1143,8 +1136,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - - percpu_ref_put(&ctx->reqs); + iocb_put(iocb); } /* aio_read_events_ring @@ -1590,6 +1582,7 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)) return -EINVAL; + req->file = fget(iocb->aio_fildes); if (unlikely(!req->file)) return -EBADF; @@ -1604,46 +1597,58 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) return 0; } -/* need to use list_del_init so we can check if item was present */ -static inline bool __aio_poll_remove(struct poll_iocb *req) +static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) { - if (list_empty(&req->wait.entry)) - return false; - list_del_init(&req->wait.entry); - return true; -} + struct file *file = iocb->poll.file; -static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) -{ - fput(iocb->poll.file); aio_complete(iocb, mangle_poll(mask), 0); + fput(file); } -static void aio_poll_work(struct work_struct *work) +static void aio_poll_complete_work(struct work_struct *work) { - struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work); + struct poll_iocb *req = container_of(work, struct poll_iocb, work); + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); + struct poll_table_struct pt = { ._key = req->events }; + struct kioctx *ctx = iocb->ki_ctx; + __poll_t mask = 0; - if (!list_empty_careful(&iocb->ki_list)) - aio_remove_iocb(iocb); - __aio_poll_complete(iocb, iocb->poll.events); + if (!READ_ONCE(req->cancelled)) + mask = vfs_poll(req->file, &pt) & req->events; + + /* + * Note that ->ki_cancel callers also delete iocb from active_reqs after + * calling ->ki_cancel. We need the ctx_lock roundtrip here to + * synchronize with them. In the cancellation case the list_del_init + * itself is not actually needed, but harmless so we keep it in to + * avoid further branches in the fast path. + */ + spin_lock_irq(&ctx->ctx_lock); + if (!mask && !READ_ONCE(req->cancelled)) { + add_wait_queue(req->head, &req->wait); + spin_unlock_irq(&ctx->ctx_lock); + return; + } + list_del_init(&iocb->ki_list); + spin_unlock_irq(&ctx->ctx_lock); + + aio_poll_complete(iocb, mask); } +/* assumes we are called with irqs disabled */ static int aio_poll_cancel(struct kiocb *iocb) { struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); struct poll_iocb *req = &aiocb->poll; - struct wait_queue_head *head = req->head; - bool found = false; - - spin_lock(&head->lock); - found = __aio_poll_remove(req); - spin_unlock(&head->lock); - if (found) { - req->events = 0; - INIT_WORK(&req->work, aio_poll_work); - schedule_work(&req->work); + spin_lock(&req->head->lock); + WRITE_ONCE(req->cancelled, true); + if (!list_empty(&req->wait.entry)) { + list_del_init(&req->wait.entry); + schedule_work(&aiocb->poll.work); } + spin_unlock(&req->head->lock); + return 0; } @@ -1652,44 +1657,59 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, { struct poll_iocb *req = container_of(wait, struct poll_iocb, wait); struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); - struct file *file = req->file; __poll_t mask = key_to_poll(key); - assert_spin_locked(&req->head->lock); + req->woken = true; /* for instances that support it check for an event match first: */ - if (mask && !(mask & req->events)) - return 0; + if (mask) { + if (!(mask & req->events)) + return 0; + + /* try to complete the iocb inline if we can: */ + if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { + list_del(&iocb->ki_list); + spin_unlock(&iocb->ki_ctx->ctx_lock); + + list_del_init(&req->wait.entry); + aio_poll_complete(iocb, mask); + return 1; + } + } - mask = file->f_op->poll_mask(file, req->events); - if (!mask) - return 0; + list_del_init(&req->wait.entry); + schedule_work(&req->work); + return 1; +} - __aio_poll_remove(req); +struct aio_poll_table { + struct poll_table_struct pt; + struct aio_kiocb *iocb; + int error; +}; - /* - * Try completing without a context switch if we can acquire ctx_lock - * without spinning. Otherwise we need to defer to a workqueue to - * avoid a deadlock due to the lock order. - */ - if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { - list_del_init(&iocb->ki_list); - spin_unlock(&iocb->ki_ctx->ctx_lock); +static void +aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, + struct poll_table_struct *p) +{ + struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); - __aio_poll_complete(iocb, mask); - } else { - req->events = mask; - INIT_WORK(&req->work, aio_poll_work); - schedule_work(&req->work); + /* multiple wait queues per file are not supported */ + if (unlikely(pt->iocb->poll.head)) { + pt->error = -EINVAL; + return; } - return 1; + pt->error = 0; + pt->iocb->poll.head = head; + add_wait_queue(head, &pt->iocb->poll.wait); } static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) { struct kioctx *ctx = aiocb->ki_ctx; struct poll_iocb *req = &aiocb->poll; + struct aio_poll_table apt; __poll_t mask; /* reject any unknown events outside the normal event mask. */ @@ -1699,40 +1719,58 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags) return -EINVAL; + INIT_WORK(&req->work, aio_poll_complete_work); req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->file = fget(iocb->aio_fildes); if (unlikely(!req->file)) return -EBADF; - if (!file_has_poll_mask(req->file)) - goto out_fail; - - req->head = req->file->f_op->get_poll_head(req->file, req->events); - if (!req->head) - goto out_fail; - if (IS_ERR(req->head)) { - mask = EPOLLERR; - goto done; - } + apt.pt._qproc = aio_poll_queue_proc; + apt.pt._key = req->events; + apt.iocb = aiocb; + apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ + + /* initialized the list so that we can do list_empty checks */ + INIT_LIST_HEAD(&req->wait.entry); init_waitqueue_func_entry(&req->wait, aio_poll_wake); - aiocb->ki_cancel = aio_poll_cancel; + + /* one for removal from waitqueue, one for this function */ + refcount_set(&aiocb->ki_refcnt, 2); + + mask = vfs_poll(req->file, &apt.pt) & req->events; + if (unlikely(!req->head)) { + /* we did not manage to set up a waitqueue, done */ + goto out; + } spin_lock_irq(&ctx->ctx_lock); spin_lock(&req->head->lock); - mask = req->file->f_op->poll_mask(req->file, req->events); - if (!mask) { - __add_wait_queue(req->head, &req->wait); + if (req->woken) { + /* wake_up context handles the rest */ + mask = 0; + apt.error = 0; + } else if (mask || apt.error) { + /* if we get an error or a mask we are done */ + WARN_ON_ONCE(list_empty(&req->wait.entry)); + list_del_init(&req->wait.entry); + } else { + /* actually waiting for an event */ list_add_tail(&aiocb->ki_list, &ctx->active_reqs); + aiocb->ki_cancel = aio_poll_cancel; } spin_unlock(&req->head->lock); spin_unlock_irq(&ctx->ctx_lock); -done: + +out: + if (unlikely(apt.error)) { + fput(req->file); + return apt.error; + } + if (mask) - __aio_poll_complete(aiocb, mask); + aio_poll_complete(aiocb, mask); + iocb_put(aiocb); return 0; -out_fail: - fput(req->file); - return -EINVAL; /* same as no support for IOCB_CMD_POLL */ } static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, @@ -2042,6 +2080,11 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, return ret; } +struct __aio_sigset { + const sigset_t __user *sigmask; + size_t sigsetsize; +}; + SYSCALL_DEFINE6(io_pgetevents, aio_context_t, ctx_id, long, min_nr, diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 3168ee4e77f4..91262c34b797 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -71,8 +71,6 @@ struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags) { - struct qstr this; - struct path path; struct file *file; if (IS_ERR(anon_inode_inode)) @@ -82,39 +80,23 @@ struct file *anon_inode_getfile(const char *name, return ERR_PTR(-ENOENT); /* - * Link the inode to a directory entry by creating a unique name - * using the inode sequence number. - */ - file = ERR_PTR(-ENOMEM); - this.name = name; - this.len = strlen(name); - this.hash = 0; - path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); - if (!path.dentry) - goto err_module; - - path.mnt = mntget(anon_inode_mnt); - /* * We know the anon_inode inode count is always greater than zero, * so ihold() is safe. */ ihold(anon_inode_inode); - - d_instantiate(path.dentry, anon_inode_inode); - - file = alloc_file(&path, OPEN_FMODE(flags), fops); + file = alloc_file_pseudo(anon_inode_inode, anon_inode_mnt, name, + flags & (O_ACCMODE | O_NONBLOCK), fops); if (IS_ERR(file)) - goto err_dput; + goto err; + file->f_mapping = anon_inode_inode->i_mapping; - file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = priv; return file; -err_dput: - path_put(&path); -err_module: +err: + iput(anon_inode_inode); module_put(fops->owner); return file; } diff --git a/fs/attr.c b/fs/attr.c index d0b4d34878fb..d22e8187477f 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -120,7 +120,6 @@ EXPORT_SYMBOL(setattr_prepare); * inode_newsize_ok - may this inode be truncated to a given size * @inode: the inode to be truncated * @offset: the new size to assign to the inode - * @Returns: 0 on success, -ve errno on failure * * inode_newsize_ok must be called with i_mutex held. * @@ -130,6 +129,8 @@ EXPORT_SYMBOL(setattr_prepare); * returned. @inode must be a file (not directory), with appropriate * permissions to allow truncate (inode_newsize_ok does NOT check these * conditions). + * + * Return: 0 on success, -ve errno on failure */ int inode_newsize_ok(const struct inode *inode, loff_t offset) { @@ -183,14 +184,14 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; if (ia_valid & ATTR_ATIME) - inode->i_atime = timespec_trunc(attr->ia_atime, - inode->i_sb->s_time_gran); + inode->i_atime = timespec64_trunc(attr->ia_atime, + inode->i_sb->s_time_gran); if (ia_valid & ATTR_MTIME) - inode->i_mtime = timespec_trunc(attr->ia_mtime, - inode->i_sb->s_time_gran); + inode->i_mtime = timespec64_trunc(attr->ia_mtime, + inode->i_sb->s_time_gran); if (ia_valid & ATTR_CTIME) - inode->i_ctime = timespec_trunc(attr->ia_ctime, - inode->i_sb->s_time_gran); + inode->i_ctime = timespec64_trunc(attr->ia_ctime, + inode->i_sb->s_time_gran); if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; @@ -205,7 +206,7 @@ EXPORT_SYMBOL(setattr_copy); /** * notify_change - modify attributes of a filesytem object * @dentry: object affected - * @iattr: new attributes + * @attr: new attributes * @delegated_inode: returns inode, if the inode is delegated * * The caller must hold the i_mutex on the affected object. @@ -227,7 +228,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; int error; - struct timespec now; + struct timespec64 now; unsigned int ia_valid = attr->ia_valid; WARN_ON_ONCE(!inode_is_locked(inode)); diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile index 43fedde15c26..1f85d35ec8b7 100644 --- a/fs/autofs/Makefile +++ b/fs/autofs/Makefile @@ -2,6 +2,6 @@ # Makefile for the linux autofs-filesystem routines. # -obj-$(CONFIG_AUTOFS_FS) += autofs.o +obj-$(CONFIG_AUTOFS_FS) += autofs4.o -autofs-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o +autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index ea4ca1445ab7..86eafda4a652 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -135,6 +135,15 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) cmd); goto out; } + } else { + unsigned int inr = _IOC_NR(cmd); + + if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD || + inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD || + inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) { + err = -EINVAL; + goto out; + } } err = 0; @@ -271,7 +280,8 @@ static int autofs_dev_ioctl_openmount(struct file *fp, dev_t devid; int err, fd; - /* param->path has already been checked */ + /* param->path has been checked in validate_dev_ioctl() */ + if (!param->openmount.devid) return -EINVAL; @@ -433,10 +443,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, dev_t devid; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ devid = sbi->sb->s_dev; @@ -521,10 +528,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, unsigned int devid, magic; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ name = param->path; type = param->ismountpoint.in.type; diff --git a/fs/autofs/init.c b/fs/autofs/init.c index cc9447e1903f..79ae07d9592f 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -23,7 +23,7 @@ static struct file_system_type autofs_fs_type = { .kill_sb = autofs_kill_sb, }; MODULE_ALIAS_FS("autofs"); -MODULE_ALIAS("autofs4"); +MODULE_ALIAS("autofs"); static int __init init_autofs_fs(void) { diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 213b51dbbb60..8035d2a44561 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -126,7 +126,7 @@ static int bad_inode_fiemap(struct inode *inode, return -EIO; } -static int bad_inode_update_time(struct inode *inode, struct timespec *time, +static int bad_inode_update_time(struct inode *inode, struct timespec64 *time, int flags) { return -EIO; @@ -134,7 +134,7 @@ static int bad_inode_update_time(struct inode *inode, struct timespec *time, static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry, struct file *file, unsigned int open_flag, - umode_t create_mode, int *opened) + umode_t create_mode) { return -EIO; } diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog index 16f2dfe8c2f7..aff7eec8f327 100644 --- a/fs/befs/ChangeLog +++ b/fs/befs/ChangeLog @@ -389,7 +389,7 @@ Version 0.4 (2001-10-28) (fs/nls/Config.in) * Added Configure.help entries for CONFIG_BEFS_FS and CONFIG_DEBUG_BEFS - (Documentation/Configure.help) + (currently at fs/befs/Kconfig) 2001-08-?? ========== diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 070b6184642d..efae2fb0930a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1259,9 +1259,8 @@ static int load_elf_library(struct file *file) goto out_free_ph; } - len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + - ELF_MIN_ALIGN - 1); - bss = eppnt->p_memsz + eppnt->p_vaddr; + len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr); + bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr); if (bss > len) { error = vm_brk(len, bss - len); if (error) @@ -1621,8 +1620,8 @@ static int fill_files_note(struct memelfnote *note) if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ return -EINVAL; size = round_up(size, PAGE_SIZE); - data = vmalloc(size); - if (!data) + data = kvmalloc(size, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(data)) return -ENOMEM; start_end_ofs = data + 2; @@ -1639,7 +1638,7 @@ static int fill_files_note(struct memelfnote *note) filename = file_path(file, name_curpos, remaining); if (IS_ERR(filename)) { if (PTR_ERR(filename) == -ENAMETOOLONG) { - vfree(data); + kvfree(data); size = size * 5 / 4; goto alloc; } @@ -1752,7 +1751,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, const struct user_regset *regset = &view->regsets[i]; do_thread_regset_writeback(t->task, regset); if (regset->core_note_type && regset->get && - (!regset->active || regset->active(t->task, regset))) { + (!regset->active || regset->active(t->task, regset) > 0)) { int ret; size_t size = regset_size(t->task, regset); void *data = kmalloc(size, GFP_KERNEL); @@ -1932,7 +1931,7 @@ static void free_note_info(struct elf_note_info *info) kfree(t); } kfree(info->psinfo.data); - vfree(info->files.data); + kvfree(info->files.data); } #else @@ -2148,7 +2147,7 @@ static void free_note_info(struct elf_note_info *info) /* Free data possibly allocated by fill_files_note(): */ if (info->notes_files) - vfree(info->notes_files->data); + kvfree(info->notes_files->data); kfree(info->prstatus); kfree(info->psinfo); @@ -2294,8 +2293,9 @@ static int elf_core_dump(struct coredump_params *cprm) if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz)) goto end_coredump; - vma_filesz = vmalloc(array_size(sizeof(*vma_filesz), (segs - 1))); - if (!vma_filesz) + vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)), + GFP_KERNEL); + if (ZERO_OR_NULL_PTR(vma_filesz)) goto end_coredump; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; @@ -2402,7 +2402,7 @@ end_coredump: cleanup: free_note_info(&info); kfree(shdr4extnum); - vfree(vma_filesz); + kvfree(vma_filesz); kfree(phdr4note); kfree(elf); out: diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 4de191563261..aa4a7a23ff99 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -4,7 +4,7 @@ * Copyright (C) 1997 Richard Günther * * binfmt_misc detects binaries via a magic or filename extension and invokes - * a specified wrapper. See Documentation/binfmt_misc.txt for more details. + * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -205,7 +205,7 @@ static int load_misc_binary(struct linux_binprm *bprm) goto error; if (fmt->flags & MISC_FMT_OPEN_FILE) { - interp_file = filp_clone_open(fmt->interp_file); + interp_file = file_clone_open(fmt->interp_file); if (!IS_ERR(interp_file)) deny_write_access(interp_file); } else { diff --git a/fs/block_dev.c b/fs/block_dev.c index 0dd87aaeb39a..aba25414231a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -221,7 +221,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, ret = bio_iov_iter_get_pages(&bio, iter); if (unlikely(ret)) - return ret; + goto out; ret = bio.bi_iter.bi_size; if (iov_iter_rw(iter) == READ) { @@ -250,12 +250,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, put_page(bvec->bv_page); } - if (vecs != inline_vecs) - kfree(vecs); - if (unlikely(bio.bi_status)) ret = blk_status_to_errno(bio.bi_status); +out: + if (vecs != inline_vecs) + kfree(vecs); + bio_uninit(&bio); return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f4bf7874c24a..118346aceea9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3197,7 +3197,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); void btrfs_set_range_writeback(void *private_data, u64 start, u64 end); -int btrfs_page_mkwrite(struct vm_fault *vmf); +vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 51fc015c7d2c..b3e45714d28f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4238,8 +4238,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask) struct extent_map *em; u64 start = page_offset(page); u64 end = start + PAGE_SIZE - 1; - struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree; - struct extent_map_tree *map = &BTRFS_I(page->mapping->host)->extent_tree; + struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host); + struct extent_io_tree *tree = &btrfs_inode->io_tree; + struct extent_map_tree *map = &btrfs_inode->extent_tree; if (gfpflags_allow_blocking(mask) && page->mapping->host->i_size > SZ_16M) { @@ -4262,6 +4263,8 @@ int try_release_extent_mapping(struct page *page, gfp_t mask) extent_map_end(em) - 1, EXTENT_LOCKED | EXTENT_WRITEBACK, 0, NULL)) { + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &btrfs_inode->runtime_flags); remove_extent_mapping(map, em); /* once for the rb tree */ free_extent_map(em); @@ -4542,8 +4545,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, offset_in_extent = em_start - em->start; em_end = extent_map_end(em); em_len = em_end - em_start; - disko = 0; flags = 0; + if (em->block_start < EXTENT_MAP_LAST_BYTE) + disko = em->block_start + offset_in_extent; + else + disko = 0; /* * bump off for our next call to get_extent @@ -4565,8 +4571,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 bytenr = em->block_start - (em->start - em->orig_start); - disko = em->block_start + offset_in_extent; - /* * As btrfs supports shared space, this information * can be exported to userspace tools via diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f660ba1e5e58..51e77d72068a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1842,16 +1842,16 @@ out: static void update_time_for_write(struct inode *inode) { - struct timespec now; + struct timespec64 now; if (IS_NOCMTIME(inode)) return; now = current_time(inode); - if (!timespec_equal(&inode->i_mtime, &now)) + if (!timespec64_equal(&inode->i_mtime, &now)) inode->i_mtime = now; - if (!timespec_equal(&inode->i_ctime, &now)) + if (!timespec64_equal(&inode->i_ctime, &now)) inode->i_ctime = now; if (IS_I_VERSION(inode)) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 89b208201783..def3ada0f0b8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5745,7 +5745,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_mtime = current_time(inode); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; - BTRFS_I(inode)->i_otime = inode->i_mtime; + BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime); return inode; } @@ -6094,7 +6094,7 @@ static int btrfs_dirty_inode(struct inode *inode) * This is a copy of file_update_time. We need this so we can return error on * ENOSPC for updating the inode in the case of file write and mmap writes. */ -static int btrfs_update_time(struct inode *inode, struct timespec *now, +static int btrfs_update_time(struct inode *inode, struct timespec64 *now, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -6335,8 +6335,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, location->type = BTRFS_INODE_ITEM_KEY; ret = btrfs_insert_inode_locked(inode); - if (ret < 0) + if (ret < 0) { + iput(inode); goto fail; + } path->leave_spinning = 1; ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); @@ -6349,7 +6351,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mtime = current_time(inode); inode->i_atime = inode->i_mtime; inode->i_ctime = inode->i_mtime; - BTRFS_I(inode)->i_otime = inode->i_mtime; + BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime); inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); @@ -6395,12 +6397,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return inode; fail_unlock: - unlock_new_inode(inode); + discard_new_inode(inode); fail: if (dir && name) BTRFS_I(dir)->index_cnt--; btrfs_free_path(path); - iput(inode); return ERR_PTR(ret); } @@ -6505,7 +6506,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = NULL; int err; - int drop_inode = 0; u64 objectid; u64 index = 0; @@ -6527,6 +6527,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_unlock; } @@ -6541,31 +6542,24 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_unlock_inode; + goto out_unlock; err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode), 0, index); - if (err) { - goto out_unlock_inode; - } else { - btrfs_update_inode(trans, root, inode); - d_instantiate_new(dentry, inode); - } + if (err) + goto out_unlock; + + btrfs_update_inode(trans, root, inode); + d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); btrfs_btree_balance_dirty(fs_info); - if (drop_inode) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } return err; - -out_unlock_inode: - drop_inode = 1; - unlock_new_inode(inode); - goto out_unlock; - } static int btrfs_create(struct inode *dir, struct dentry *dentry, @@ -6575,7 +6569,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = NULL; - int drop_inode_on_err = 0; int err; u64 objectid; u64 index = 0; @@ -6598,9 +6591,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_unlock; } - drop_inode_on_err = 1; /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see @@ -6613,33 +6606,28 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_unlock_inode; + goto out_unlock; err = btrfs_update_inode(trans, root, inode); if (err) - goto out_unlock_inode; + goto out_unlock; err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode), 0, index); if (err) - goto out_unlock_inode; + goto out_unlock; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); - if (err && drop_inode_on_err) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } btrfs_btree_balance_dirty(fs_info); return err; - -out_unlock_inode: - unlock_new_inode(inode); - goto out_unlock; - } static int btrfs_link(struct dentry *old_dentry, struct inode *dir, @@ -6748,6 +6736,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) S_IFDIR | mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_fail; } @@ -6758,34 +6747,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_fail_inode; + goto out_fail; btrfs_i_size_write(BTRFS_I(inode), 0); err = btrfs_update_inode(trans, root, inode); if (err) - goto out_fail_inode; + goto out_fail; err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), dentry->d_name.name, dentry->d_name.len, 0, index); if (err) - goto out_fail_inode; + goto out_fail; d_instantiate_new(dentry, inode); drop_on_err = 0; out_fail: btrfs_end_transaction(trans); - if (drop_on_err) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } btrfs_btree_balance_dirty(fs_info); return err; - -out_fail_inode: - unlock_new_inode(inode); - goto out_fail; } static noinline int uncompress_inline(struct btrfs_path *path, @@ -8872,7 +8857,7 @@ again: * beyond EOF, then the page is guaranteed safe against truncation until we * unlock the page. */ -int btrfs_page_mkwrite(struct vm_fault *vmf) +vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); @@ -8884,7 +8869,8 @@ int btrfs_page_mkwrite(struct vm_fault *vmf) char *kaddr; unsigned long zero_start; loff_t size; - int ret; + vm_fault_t ret; + int ret2; int reserved = 0; u64 reserved_space; u64 page_start; @@ -8906,17 +8892,14 @@ int btrfs_page_mkwrite(struct vm_fault *vmf) * end up waiting indefinitely to get a lock on the page currently * being processed by btrfs_page_mkwrite() function. */ - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, + ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, reserved_space); - if (!ret) { - ret = file_update_time(vmf->vma->vm_file); + if (!ret2) { + ret2 = file_update_time(vmf->vma->vm_file); reserved = 1; } - if (ret) { - if (ret == -ENOMEM) - ret = VM_FAULT_OOM; - else /* -ENOSPC, -EIO, etc */ - ret = VM_FAULT_SIGBUS; + if (ret2) { + ret = vmf_error(ret2); if (reserved) goto out; goto out_noreserve; @@ -8975,15 +8958,15 @@ again: EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state); - ret = btrfs_set_extent_delalloc(inode, page_start, end, 0, + ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0, &cached_state, 0); - if (ret) { + if (ret2) { unlock_extent_cached(io_tree, page_start, page_end, &cached_state); ret = VM_FAULT_SIGBUS; goto out_unlock; } - ret = 0; + ret2 = 0; /* page is wholly or partially inside EOF */ if (page_start + PAGE_SIZE > size) @@ -9007,13 +8990,14 @@ again: unlock_extent_cached(io_tree, page_start, page_end, &cached_state); -out_unlock: - if (!ret) { + if (!ret2) { btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true); sb_end_pagefault(inode->i_sb); extent_changeset_free(data_reserved); return VM_FAULT_LOCKED; } + +out_unlock: unlock_page(page); out: btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0)); @@ -9437,7 +9421,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, struct btrfs_root *dest = BTRFS_I(new_dir)->root; struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; - struct timespec ctime = current_time(old_inode); + struct timespec64 ctime = current_time(old_inode); struct dentry *parent; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); u64 new_ino = btrfs_ino(BTRFS_I(new_inode)); @@ -9445,6 +9429,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, u64 new_idx = 0; u64 root_objectid; int ret; + int ret2; bool root_log_pinned = false; bool dest_log_pinned = false; @@ -9641,7 +9626,8 @@ out_fail: dest_log_pinned = false; } } - ret = btrfs_end_transaction(trans); + ret2 = btrfs_end_transaction(trans); + ret = ret ? ret : ret2; out_notrans: if (new_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&fs_info->subvol_sem); @@ -10114,7 +10100,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, struct btrfs_key key; struct inode *inode = NULL; int err; - int drop_inode = 0; u64 objectid; u64 index = 0; int name_len; @@ -10147,6 +10132,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, objectid, S_IFLNK|S_IRWXUGO, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); + inode = NULL; goto out_unlock; } @@ -10163,12 +10149,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_unlock_inode; + goto out_unlock; path = btrfs_alloc_path(); if (!path) { err = -ENOMEM; - goto out_unlock_inode; + goto out_unlock; } key.objectid = btrfs_ino(BTRFS_I(inode)); key.offset = 0; @@ -10178,7 +10164,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, datasize); if (err) { btrfs_free_path(path); - goto out_unlock_inode; + goto out_unlock; } leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], @@ -10210,26 +10196,19 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (!err) err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode), 0, index); - if (err) { - drop_inode = 1; - goto out_unlock_inode; - } + if (err) + goto out_unlock; d_instantiate_new(dentry, inode); out_unlock: btrfs_end_transaction(trans); - if (drop_inode) { + if (err && inode) { inode_dec_link_count(inode); - iput(inode); + discard_new_inode(inode); } btrfs_btree_balance_dirty(fs_info); return err; - -out_unlock_inode: - drop_inode = 1; - unlock_new_inode(inode); - goto out_unlock; } static int __btrfs_prealloc_file_range(struct inode *inode, int mode, @@ -10438,14 +10417,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) ret = btrfs_init_inode_security(trans, inode, dir, NULL); if (ret) - goto out_inode; + goto out; ret = btrfs_update_inode(trans, root, inode); if (ret) - goto out_inode; + goto out; ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) - goto out_inode; + goto out; /* * We set number of links to 0 in btrfs_new_inode(), and here we set @@ -10455,21 +10434,15 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() */ set_nlink(inode, 1); - unlock_new_inode(inode); d_tmpfile(dentry, inode); + unlock_new_inode(inode); mark_inode_dirty(inode); - out: btrfs_end_transaction(trans); - if (ret) - iput(inode); + if (ret && inode) + discard_new_inode(inode); btrfs_btree_balance_dirty(fs_info); return ret; - -out_inode: - unlock_new_inode(inode); - goto out; - } __attribute__((const)) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d29992f7dc63..b077544b5232 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -562,7 +562,7 @@ static noinline int create_subvol(struct inode *dir, struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *new_root; struct btrfs_block_rsv block_rsv; - struct timespec cur_time = current_time(dir); + struct timespec64 cur_time = current_time(dir); struct inode *inode; int ret; int err; @@ -2438,6 +2438,10 @@ static int btrfs_search_path_in_tree_user(struct inode *inode, } temp_inode = btrfs_iget(sb, &key2, root, NULL); + if (IS_ERR(temp_inode)) { + ret = PTR_ERR(temp_inode); + goto out; + } ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC); iput(temp_inode); if (ret) { @@ -3323,11 +3327,13 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) if (pg) { unlock_page(pg); put_page(pg); + cmp->src_pages[i] = NULL; } pg = cmp->dst_pages[i]; if (pg) { unlock_page(pg); put_page(pg); + cmp->dst_pages[i] = NULL; } } } @@ -3573,7 +3579,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, dst, dst_loff, &cmp); if (ret) - goto out_unlock; + goto out_free; loff += BTRFS_MAX_DEDUPE_LEN; dst_loff += BTRFS_MAX_DEDUPE_LEN; @@ -3583,16 +3589,16 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, ret = btrfs_extent_same_range(src, loff, tail_len, dst, dst_loff, &cmp); +out_free: + kvfree(cmp.src_pages); + kvfree(cmp.dst_pages); + out_unlock: if (same_inode) inode_unlock(src); else btrfs_double_inode_unlock(src, dst); -out_free: - kvfree(cmp.src_pages); - kvfree(cmp.dst_pages); - return ret; } @@ -5391,7 +5397,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root_item *root_item = &root->root_item; struct btrfs_trans_handle *trans; - struct timespec ct = current_time(inode); + struct timespec64 ct = current_time(inode); int ret = 0; int received_uuid_changed; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1874a6d2e6f5..c25dc47210a3 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2680,8 +2680,10 @@ out: free_extent_buffer(scratch_leaf); } - if (done && !ret) + if (done && !ret) { ret = 1; + fs_info->qgroup_rescan_progress.objectid = (u64)-1; + } return ret; } @@ -2784,13 +2786,20 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, if (!init_flags) { /* we're resuming qgroup rescan at mount time */ - if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)) + if (!(fs_info->qgroup_flags & + BTRFS_QGROUP_STATUS_FLAG_RESCAN)) { btrfs_warn(fs_info, "qgroup rescan init failed, qgroup is not enabled"); - else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) + ret = -EINVAL; + } else if (!(fs_info->qgroup_flags & + BTRFS_QGROUP_STATUS_FLAG_ON)) { btrfs_warn(fs_info, "qgroup rescan init failed, qgroup rescan is not queued"); - return -EINVAL; + ret = -EINVAL; + } + + if (ret) + return ret; } mutex_lock(&fs_info->qgroup_rescan_lock); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 6db3bda44aa5..c451285976ac 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -485,9 +485,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_root_item *item = &root->root_item; - struct timespec ct; + struct timespec64 ct; - ktime_get_real_ts(&ct); + ktime_get_real_ts64(&ct); spin_lock(&root->root_item_lock); btrfs_set_root_ctransid(item, trans->transid); btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a59005862010..6702896cdb8f 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1151,11 +1151,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) return ret; } - if (sctx->is_dev_replace && !is_metadata && !have_csum) { - sblocks_for_recheck = NULL; - goto nodatasum_case; - } - /* * read all mirrors one after the other. This includes to * re-read the extent or metadata block that failed (that was @@ -1268,13 +1263,19 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) goto out; } - if (!is_metadata && !have_csum) { + /* + * NOTE: Even for nodatasum case, it's still possible that it's a + * compressed data extent, thus scrub_fixup_nodatasum(), which write + * inode page cache onto disk, could cause serious data corruption. + * + * So here we could only read from disk, and hope our recovery could + * reach disk before the newer write. + */ + if (0 && !is_metadata && !have_csum) { struct scrub_fixup_nodatasum *fixup_nodatasum; WARN_ON(sctx->is_dev_replace); -nodatasum_case: - /* * !is_metadata and !have_csum, this means that the data * might not be COWed, that it might be modified @@ -2799,7 +2800,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map, have_csum = scrub_find_csum(sctx, logical, csum); if (have_csum == 0) ++sctx->stat.no_csum; - if (sctx->is_dev_replace && !have_csum) { + if (0 && sctx->is_dev_replace && !have_csum) { ret = copy_nocow_pages(sctx, logical, l, mirror_num, physical_for_dev_replace); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4485eae41e88..ff5f6c719976 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1422,7 +1422,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct dentry *dentry; struct extent_buffer *tmp; struct extent_buffer *old; - struct timespec cur_time; + struct timespec64 cur_time; int ret = 0; u64 to_reserve = 0; u64 index = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e034ad9e23b4..1da162928d1a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1146,6 +1146,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, { int ret; + mutex_lock(&uuid_mutex); mutex_lock(&fs_devices->device_list_mutex); if (fs_devices->opened) { fs_devices->opened++; @@ -1155,6 +1156,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, ret = open_fs_devices(fs_devices, flags, holder); } mutex_unlock(&fs_devices->device_list_mutex); + mutex_unlock(&uuid_mutex); return ret; } diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index d9f001078e08..4a717d400807 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -218,7 +218,8 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) "%s", fsdef->dentry->d_sb->s_id); - fscache_object_init(&fsdef->fscache, NULL, &cache->cache); + fscache_object_init(&fsdef->fscache, &fscache_fsdef_index, + &cache->cache); ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); if (ret < 0) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ab0bbe93b398..af2b17b21b94 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -186,12 +186,12 @@ try_again: * need to wait for it to be destroyed */ wait_for_old_object: trace_cachefiles_wait_active(object, dentry, xobject); + clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); if (fscache_object_is_live(&xobject->fscache)) { pr_err("\n"); pr_err("Error: Unexpected object collision\n"); cachefiles_printk_object(object, xobject); - BUG(); } atomic_inc(&xobject->usage); write_unlock(&cache->active_lock); @@ -248,7 +248,6 @@ wait_for_old_object: goto try_again; requeue: - clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo); _leave(" = -ETIMEDOUT"); return -ETIMEDOUT; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 5082c8a49686..40f7595aad10 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -27,6 +27,7 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, struct cachefiles_one_read *monitor = container_of(wait, struct cachefiles_one_read, monitor); struct cachefiles_object *object; + struct fscache_retrieval *op = monitor->op; struct wait_bit_key *key = _key; struct page *page = wait->private; @@ -51,16 +52,22 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, list_del(&wait->entry); /* move onto the action list and queue for FS-Cache thread pool */ - ASSERT(monitor->op); + ASSERT(op); - object = container_of(monitor->op->op.object, - struct cachefiles_object, fscache); + /* We need to temporarily bump the usage count as we don't own a ref + * here otherwise cachefiles_read_copier() may free the op between the + * monitor being enqueued on the op->to_do list and the op getting + * enqueued on the work queue. + */ + fscache_get_retrieval(op); + object = container_of(op->op.object, struct cachefiles_object, fscache); spin_lock(&object->work_lock); - list_add_tail(&monitor->op_link, &monitor->op->to_do); + list_add_tail(&monitor->op_link, &op->to_do); spin_unlock(&object->work_lock); - fscache_enqueue_retrieval(monitor->op); + fscache_enqueue_retrieval(op); + fscache_put_retrieval(op); return 0; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c9cb2f33a6d6..292b3d72d725 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -574,6 +574,7 @@ static u64 get_writepages_data_length(struct inode *inode, */ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) { + struct timespec ts; struct inode *inode; struct ceph_inode_info *ci; struct ceph_fs_client *fsc; @@ -624,11 +625,12 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); set_page_writeback(page); + ts = timespec64_to_timespec(inode->i_mtime); err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, snapc, page_off, len, ceph_wbc.truncate_seq, ceph_wbc.truncate_size, - &inode->i_mtime, &page, 1); + &ts, &page, 1); if (err < 0) { struct writeback_control tmp_wbc; if (!wbc) @@ -1132,7 +1134,7 @@ new_request: pages = NULL; } - req->r_mtime = inode->i_mtime; + req->r_mtime = timespec64_to_timespec(inode->i_mtime); rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); BUG_ON(rc); req = NULL; @@ -1732,7 +1734,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) goto out; } - req->r_mtime = inode->i_mtime; + req->r_mtime = timespec64_to_timespec(inode->i_mtime); err = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!err) err = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1774,7 +1776,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) goto out_put; } - req->r_mtime = inode->i_mtime; + req->r_mtime = timespec64_to_timespec(inode->i_mtime); err = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!err) err = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1935,8 +1937,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, 0, false, true); err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false); - wr_req->r_mtime = ci->vfs_inode.i_mtime; - wr_req->r_abort_on_full = true; + wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime); err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false); if (!err) diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index bb524c880b1e..362900e42424 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -130,7 +130,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( memset(&aux, 0, sizeof(aux)); aux.version = ci->i_version; - aux.mtime = inode->i_mtime; + aux.mtime = timespec64_to_timespec(inode->i_mtime); if (memcmp(data, &aux, sizeof(aux)) != 0) return FSCACHE_CHECKAUX_OBSOLETE; @@ -163,7 +163,7 @@ void ceph_fscache_register_inode_cookie(struct inode *inode) if (!ci->fscache) { memset(&aux, 0, sizeof(aux)); aux.version = ci->i_version; - aux.mtime = inode->i_mtime; + aux.mtime = timespec64_to_timespec(inode->i_mtime); ci->fscache = fscache_acquire_cookie(fsc->fscache, &ceph_fscache_inode_object_def, &ci->i_vino, sizeof(ci->i_vino), diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 23dbfae16156..990258cbd836 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -69,6 +69,8 @@ static char *gcap_string(char *s, int c) *s++ = 'w'; if (c & CEPH_CAP_GBUFFER) *s++ = 'b'; + if (c & CEPH_CAP_GWREXTEND) + *s++ = 'a'; if (c & CEPH_CAP_GLAZYIO) *s++ = 'l'; return s; @@ -1358,9 +1360,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, arg.xattr_buf = NULL; } - arg.mtime = inode->i_mtime; - arg.atime = inode->i_atime; - arg.ctime = inode->i_ctime; + arg.mtime = timespec64_to_timespec(inode->i_mtime); + arg.atime = timespec64_to_timespec(inode->i_atime); + arg.ctime = timespec64_to_timespec(inode->i_ctime); arg.op = op; arg.caps = cap->implemented; @@ -3022,30 +3024,41 @@ static void invalidate_aliases(struct inode *inode) dput(prev); } +struct cap_extra_info { + struct ceph_string *pool_ns; + /* inline data */ + u64 inline_version; + void *inline_data; + u32 inline_len; + /* dirstat */ + bool dirstat_valid; + u64 nfiles; + u64 nsubdirs; + /* currently issued */ + int issued; +}; + /* * Handle a cap GRANT message from the MDS. (Note that a GRANT may * actually be a revocation if it specifies a smaller cap set.) * * caller holds s_mutex and i_ceph_lock, we drop both. */ -static void handle_cap_grant(struct ceph_mds_client *mdsc, - struct inode *inode, struct ceph_mds_caps *grant, - struct ceph_string **pns, u64 inline_version, - void *inline_data, u32 inline_len, - struct ceph_buffer *xattr_buf, +static void handle_cap_grant(struct inode *inode, struct ceph_mds_session *session, - struct ceph_cap *cap, int issued) + struct ceph_cap *cap, + struct ceph_mds_caps *grant, + struct ceph_buffer *xattr_buf, + struct cap_extra_info *extra_info) __releases(ci->i_ceph_lock) - __releases(mdsc->snap_rwsem) + __releases(session->s_mdsc->snap_rwsem) { struct ceph_inode_info *ci = ceph_inode(inode); - int mds = session->s_mds; int seq = le32_to_cpu(grant->seq); int newcaps = le32_to_cpu(grant->caps); int used, wanted, dirty; u64 size = le64_to_cpu(grant->size); u64 max_size = le64_to_cpu(grant->max_size); - struct timespec mtime, atime, ctime; int check_caps = 0; bool wake = false; bool writeback = false; @@ -3055,7 +3068,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, bool fill_inline = false; dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", - inode, cap, mds, seq, ceph_cap_string(newcaps)); + inode, cap, session->s_mds, seq, ceph_cap_string(newcaps)); dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, inode->i_size); @@ -3101,7 +3114,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, __check_cap_issue(ci, cap, newcaps); if ((newcaps & CEPH_CAP_AUTH_SHARED) && - (issued & CEPH_CAP_AUTH_EXCL) == 0) { + (extra_info->issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(grant->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); @@ -3110,15 +3123,16 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, from_kgid(&init_user_ns, inode->i_gid)); } - if ((newcaps & CEPH_CAP_AUTH_SHARED) && - (issued & CEPH_CAP_LINK_EXCL) == 0) { + if ((newcaps & CEPH_CAP_LINK_SHARED) && + (extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) { set_nlink(inode, le32_to_cpu(grant->nlink)); if (inode->i_nlink == 0 && (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) deleted_inode = true; } - if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) { + if ((extra_info->issued & CEPH_CAP_XATTR_EXCL) == 0 && + grant->xattr_len) { int len = le32_to_cpu(grant->xattr_len); u64 version = le64_to_cpu(grant->xattr_version); @@ -3134,15 +3148,21 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, } if (newcaps & CEPH_CAP_ANY_RD) { + struct timespec mtime, atime, ctime; /* ctime/mtime/atime? */ ceph_decode_timespec(&mtime, &grant->mtime); ceph_decode_timespec(&atime, &grant->atime); ceph_decode_timespec(&ctime, &grant->ctime); - ceph_fill_file_time(inode, issued, + ceph_fill_file_time(inode, extra_info->issued, le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, &atime); } + if ((newcaps & CEPH_CAP_FILE_SHARED) && extra_info->dirstat_valid) { + ci->i_files = extra_info->nfiles; + ci->i_subdirs = extra_info->nsubdirs; + } + if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { /* file layout may have changed */ s64 old_pool = ci->i_layout.pool_id; @@ -3151,15 +3171,16 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, ceph_file_layout_from_legacy(&ci->i_layout, &grant->layout); old_ns = rcu_dereference_protected(ci->i_layout.pool_ns, lockdep_is_held(&ci->i_ceph_lock)); - rcu_assign_pointer(ci->i_layout.pool_ns, *pns); + rcu_assign_pointer(ci->i_layout.pool_ns, extra_info->pool_ns); - if (ci->i_layout.pool_id != old_pool || *pns != old_ns) + if (ci->i_layout.pool_id != old_pool || + extra_info->pool_ns != old_ns) ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; - *pns = old_ns; + extra_info->pool_ns = old_ns; /* size/truncate_seq? */ - queue_trunc = ceph_fill_file_size(inode, issued, + queue_trunc = ceph_fill_file_size(inode, extra_info->issued, le32_to_cpu(grant->truncate_seq), le64_to_cpu(grant->truncate_size), size); @@ -3238,24 +3259,26 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, } BUG_ON(cap->issued & ~cap->implemented); - if (inline_version > 0 && inline_version >= ci->i_inline_version) { - ci->i_inline_version = inline_version; + if (extra_info->inline_version > 0 && + extra_info->inline_version >= ci->i_inline_version) { + ci->i_inline_version = extra_info->inline_version; if (ci->i_inline_version != CEPH_INLINE_NONE && (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO))) fill_inline = true; } if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { - if (newcaps & ~issued) + if (newcaps & ~extra_info->issued) wake = true; - kick_flushing_inode_caps(mdsc, session, inode); - up_read(&mdsc->snap_rwsem); + kick_flushing_inode_caps(session->s_mdsc, session, inode); + up_read(&session->s_mdsc->snap_rwsem); } else { spin_unlock(&ci->i_ceph_lock); } if (fill_inline) - ceph_fill_inline_data(inode, NULL, inline_data, inline_len); + ceph_fill_inline_data(inode, NULL, extra_info->inline_data, + extra_info->inline_len); if (queue_trunc) ceph_queue_vmtruncate(inode); @@ -3720,31 +3743,25 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; - struct super_block *sb = mdsc->fsc->sb; struct inode *inode; struct ceph_inode_info *ci; struct ceph_cap *cap; struct ceph_mds_caps *h; struct ceph_mds_cap_peer *peer = NULL; struct ceph_snap_realm *realm = NULL; - struct ceph_string *pool_ns = NULL; - int mds = session->s_mds; - int op, issued; + int op; + int msg_version = le16_to_cpu(msg->hdr.version); u32 seq, mseq; struct ceph_vino vino; - u64 tid; - u64 inline_version = 0; - void *inline_data = NULL; - u32 inline_len = 0; void *snaptrace; size_t snaptrace_len; void *p, *end; + struct cap_extra_info extra_info = {}; - dout("handle_caps from mds%d\n", mds); + dout("handle_caps from mds%d\n", session->s_mds); /* decode */ end = msg->front.iov_base + msg->front.iov_len; - tid = le64_to_cpu(msg->hdr.tid); if (msg->front.iov_len < sizeof(*h)) goto bad; h = msg->front.iov_base; @@ -3758,7 +3775,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, snaptrace_len = le32_to_cpu(h->snap_trace_len); p = snaptrace + snaptrace_len; - if (le16_to_cpu(msg->hdr.version) >= 2) { + if (msg_version >= 2) { u32 flock_len; ceph_decode_32_safe(&p, end, flock_len, bad); if (p + flock_len > end) @@ -3766,7 +3783,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, p += flock_len; } - if (le16_to_cpu(msg->hdr.version) >= 3) { + if (msg_version >= 3) { if (op == CEPH_CAP_OP_IMPORT) { if (p + sizeof(*peer) > end) goto bad; @@ -3778,16 +3795,16 @@ void ceph_handle_caps(struct ceph_mds_session *session, } } - if (le16_to_cpu(msg->hdr.version) >= 4) { - ceph_decode_64_safe(&p, end, inline_version, bad); - ceph_decode_32_safe(&p, end, inline_len, bad); - if (p + inline_len > end) + if (msg_version >= 4) { + ceph_decode_64_safe(&p, end, extra_info.inline_version, bad); + ceph_decode_32_safe(&p, end, extra_info.inline_len, bad); + if (p + extra_info.inline_len > end) goto bad; - inline_data = p; - p += inline_len; + extra_info.inline_data = p; + p += extra_info.inline_len; } - if (le16_to_cpu(msg->hdr.version) >= 5) { + if (msg_version >= 5) { struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc; u32 epoch_barrier; @@ -3795,7 +3812,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, ceph_osdc_update_epoch_barrier(osdc, epoch_barrier); } - if (le16_to_cpu(msg->hdr.version) >= 8) { + if (msg_version >= 8) { u64 flush_tid; u32 caller_uid, caller_gid; u32 pool_ns_len; @@ -3809,13 +3826,33 @@ void ceph_handle_caps(struct ceph_mds_session *session, ceph_decode_32_safe(&p, end, pool_ns_len, bad); if (pool_ns_len > 0) { ceph_decode_need(&p, end, pool_ns_len, bad); - pool_ns = ceph_find_or_create_string(p, pool_ns_len); + extra_info.pool_ns = + ceph_find_or_create_string(p, pool_ns_len); p += pool_ns_len; } } + if (msg_version >= 11) { + struct ceph_timespec *btime; + u64 change_attr; + u32 flags; + + /* version >= 9 */ + if (p + sizeof(*btime) > end) + goto bad; + btime = p; + p += sizeof(*btime); + ceph_decode_64_safe(&p, end, change_attr, bad); + /* version >= 10 */ + ceph_decode_32_safe(&p, end, flags, bad); + /* version >= 11 */ + extra_info.dirstat_valid = true; + ceph_decode_64_safe(&p, end, extra_info.nfiles, bad); + ceph_decode_64_safe(&p, end, extra_info.nsubdirs, bad); + } + /* lookup ino */ - inode = ceph_find_inode(sb, vino); + inode = ceph_find_inode(mdsc->fsc->sb, vino); ci = ceph_inode(inode); dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino, vino.snap, inode); @@ -3848,7 +3885,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, /* these will work even if we don't have a cap yet */ switch (op) { case CEPH_CAP_OP_FLUSHSNAP_ACK: - handle_cap_flushsnap_ack(inode, tid, h, session); + handle_cap_flushsnap_ack(inode, le64_to_cpu(msg->hdr.tid), + h, session); goto done; case CEPH_CAP_OP_EXPORT: @@ -3867,10 +3905,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, down_read(&mdsc->snap_rwsem); } handle_cap_import(mdsc, inode, h, peer, session, - &cap, &issued); - handle_cap_grant(mdsc, inode, h, &pool_ns, - inline_version, inline_data, inline_len, - msg->middle, session, cap, issued); + &cap, &extra_info.issued); + handle_cap_grant(inode, session, cap, + h, msg->middle, &extra_info); if (realm) ceph_put_snap_realm(mdsc, realm); goto done_unlocked; @@ -3878,10 +3915,11 @@ void ceph_handle_caps(struct ceph_mds_session *session, /* the rest require a cap */ spin_lock(&ci->i_ceph_lock); - cap = __get_cap_for_mds(ceph_inode(inode), mds); + cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds); if (!cap) { dout(" no cap on %p ino %llx.%llx from mds%d\n", - inode, ceph_ino(inode), ceph_snap(inode), mds); + inode, ceph_ino(inode), ceph_snap(inode), + session->s_mds); spin_unlock(&ci->i_ceph_lock); goto flush_cap_releases; } @@ -3890,15 +3928,15 @@ void ceph_handle_caps(struct ceph_mds_session *session, switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: - __ceph_caps_issued(ci, &issued); - issued |= __ceph_caps_dirty(ci); - handle_cap_grant(mdsc, inode, h, &pool_ns, - inline_version, inline_data, inline_len, - msg->middle, session, cap, issued); + __ceph_caps_issued(ci, &extra_info.issued); + extra_info.issued |= __ceph_caps_dirty(ci); + handle_cap_grant(inode, session, cap, + h, msg->middle, &extra_info); goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: - handle_cap_flush_ack(inode, tid, h, session, cap); + handle_cap_flush_ack(inode, le64_to_cpu(msg->hdr.tid), + h, session, cap); break; case CEPH_CAP_OP_TRUNC: @@ -3925,7 +3963,7 @@ done: mutex_unlock(&session->s_mutex); done_unlocked: iput(inode); - ceph_put_string(pool_ns); + ceph_put_string(extra_info.pool_ns); return; bad: diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1a78dd6f8bf2..036ac0f3a393 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1486,6 +1486,8 @@ const struct file_operations ceph_dir_fops = { .release = ceph_release, .unlocked_ioctl = ceph_ioctl, .fsync = ceph_fsync, + .lock = ceph_lock, + .flock = ceph_flock, }; const struct file_operations ceph_snapdir_fops = { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index cf0e45b10121..e2679e8a2535 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -429,8 +429,7 @@ out: * file or symlink, return 1 so the VFS can retry. */ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t mode, - int *opened) + struct file *file, unsigned flags, umode_t mode) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -507,9 +506,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, dout("atomic_open finish_open on dn %p\n", dn); if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { ceph_init_inode_acls(d_inode(dentry), &acls); - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; } - err = finish_open(file, dentry, ceph_open, opened); + err = finish_open(file, dentry, ceph_open); } out_req: if (!req->r_err && req->r_target_inode) @@ -895,7 +894,6 @@ static void ceph_aio_retry_work(struct work_struct *work) req->r_callback = ceph_aio_complete_req; req->r_inode = inode; req->r_priv = aio_req; - req->r_abort_on_full = true; ret = ceph_osdc_start_request(req->r_osdc, req, false); out: @@ -924,7 +922,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, int num_pages = 0; int flags; int ret; - struct timespec mtime = current_time(inode); + struct timespec mtime = timespec64_to_timespec(current_time(inode)); size_t count = iov_iter_count(iter); loff_t pos = iocb->ki_pos; bool write = iov_iter_rw(iter) == WRITE; @@ -1132,7 +1130,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, int flags; int ret; bool check_caps = false; - struct timespec mtime = current_time(inode); + struct timespec mtime = timespec64_to_timespec(current_time(inode)); size_t count = iov_iter_count(from); if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) @@ -1664,7 +1662,7 @@ static int ceph_zero_partial_object(struct inode *inode, goto out; } - req->r_mtime = inode->i_mtime; + req->r_mtime = timespec64_to_timespec(inode->i_mtime); ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!ret) { ret = ceph_osdc_wait_request(&fsc->client->osdc, req); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ae056927080d..a866be999216 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -662,6 +662,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, struct timespec *mtime, struct timespec *atime) { struct ceph_inode_info *ci = ceph_inode(inode); + struct timespec64 ctime64 = timespec_to_timespec64(*ctime); + struct timespec64 mtime64 = timespec_to_timespec64(*mtime); + struct timespec64 atime64 = timespec_to_timespec64(*atime); int warn = 0; if (issued & (CEPH_CAP_FILE_EXCL| @@ -670,39 +673,39 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { if (ci->i_version == 0 || - timespec_compare(ctime, &inode->i_ctime) > 0) { - dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", - inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, - ctime->tv_sec, ctime->tv_nsec); - inode->i_ctime = *ctime; + timespec64_compare(&ctime64, &inode->i_ctime) > 0) { + dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", + (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + (long long)ctime->tv_sec, ctime->tv_nsec); + inode->i_ctime = ctime64; } if (ci->i_version == 0 || ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ - dout("mtime %ld.%09ld -> %ld.%09ld " + dout("mtime %lld.%09ld -> %lld.%09ld " "tw %d -> %d\n", - inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - mtime->tv_sec, mtime->tv_nsec, + (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + (long long)mtime->tv_sec, mtime->tv_nsec, ci->i_time_warp_seq, (int)time_warp_seq); - inode->i_mtime = *mtime; - inode->i_atime = *atime; + inode->i_mtime = mtime64; + inode->i_atime = atime64; ci->i_time_warp_seq = time_warp_seq; } else if (time_warp_seq == ci->i_time_warp_seq) { /* nobody did utimes(); take the max */ - if (timespec_compare(mtime, &inode->i_mtime) > 0) { - dout("mtime %ld.%09ld -> %ld.%09ld inc\n", - inode->i_mtime.tv_sec, + if (timespec64_compare(&mtime64, &inode->i_mtime) > 0) { + dout("mtime %lld.%09ld -> %lld.%09ld inc\n", + (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - mtime->tv_sec, mtime->tv_nsec); - inode->i_mtime = *mtime; + (long long)mtime->tv_sec, mtime->tv_nsec); + inode->i_mtime = mtime64; } - if (timespec_compare(atime, &inode->i_atime) > 0) { - dout("atime %ld.%09ld -> %ld.%09ld inc\n", - inode->i_atime.tv_sec, + if (timespec64_compare(&atime64, &inode->i_atime) > 0) { + dout("atime %lld.%09ld -> %lld.%09ld inc\n", + (long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec, - atime->tv_sec, atime->tv_nsec); - inode->i_atime = *atime; + (long long)atime->tv_sec, atime->tv_nsec); + inode->i_atime = atime64; } } else if (issued & CEPH_CAP_FILE_EXCL) { /* we did a utimes(); ignore mds values */ @@ -712,9 +715,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, } else { /* we have no write|excl caps; whatever the MDS says is true */ if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { - inode->i_ctime = *ctime; - inode->i_mtime = *mtime; - inode->i_atime = *atime; + inode->i_ctime = ctime64; + inode->i_mtime = mtime64; + inode->i_atime = atime64; ci->i_time_warp_seq = time_warp_seq; } else { warn = 1; @@ -739,7 +742,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); - int issued = 0, implemented, new_issued; + int issued, new_issued, info_caps; struct timespec mtime, atime, ctime; struct ceph_buffer *xattr_blob = NULL; struct ceph_string *pool_ns = NULL; @@ -754,8 +757,10 @@ static int fill_inode(struct inode *inode, struct page *locked_page, inode, ceph_vinop(inode), le64_to_cpu(info->version), ci->i_version); + info_caps = le32_to_cpu(info->cap.caps); + /* prealloc new cap struct */ - if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP) + if (info_caps && ceph_snap(inode) == CEPH_NOSNAP) new_cap = ceph_get_cap(mdsc, caps_reservation); /* @@ -792,9 +797,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, le64_to_cpu(info->version) > (ci->i_version & ~1))) new_version = true; - issued = __ceph_caps_issued(ci, &implemented); - issued |= implemented | __ceph_caps_dirty(ci); - new_issued = ~issued & le32_to_cpu(info->cap.caps); + __ceph_caps_issued(ci, &issued); + issued |= __ceph_caps_dirty(ci); + new_issued = ~issued & info_caps; /* update inode */ inode->i_rdev = le32_to_cpu(info->rdev); @@ -826,6 +831,11 @@ static int fill_inode(struct inode *inode, struct page *locked_page, &ctime, &mtime, &atime); } + if (new_version || (info_caps & CEPH_CAP_FILE_SHARED)) { + ci->i_files = le64_to_cpu(info->files); + ci->i_subdirs = le64_to_cpu(info->subdirs); + } + if (new_version || (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { s64 old_pool = ci->i_layout.pool_id; @@ -854,6 +864,18 @@ static int fill_inode(struct inode *inode, struct page *locked_page, } } + /* layout and rstat are not tracked by capability, update them if + * the inode info is from auth mds */ + if (new_version || (info->cap.flags & CEPH_CAP_FLAG_AUTH)) { + if (S_ISDIR(inode->i_mode)) { + ci->i_dir_layout = iinfo->dir_layout; + ci->i_rbytes = le64_to_cpu(info->rbytes); + ci->i_rfiles = le64_to_cpu(info->rfiles); + ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); + ceph_decode_timespec(&ci->i_rctime, &info->rctime); + } + } + /* xattrs */ /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) && @@ -870,7 +892,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page, } /* finally update i_version */ - ci->i_version = le64_to_cpu(info->version); + if (le64_to_cpu(info->version) > ci->i_version) + ci->i_version = le64_to_cpu(info->version); inode->i_mapping->a_ops = &ceph_aops; @@ -918,15 +941,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, case S_IFDIR: inode->i_op = &ceph_dir_iops; inode->i_fop = &ceph_dir_fops; - - ci->i_dir_layout = iinfo->dir_layout; - - ci->i_files = le64_to_cpu(info->files); - ci->i_subdirs = le64_to_cpu(info->subdirs); - ci->i_rbytes = le64_to_cpu(info->rbytes); - ci->i_rfiles = le64_to_cpu(info->rfiles); - ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); - ceph_decode_timespec(&ci->i_rctime, &info->rctime); break; default: pr_err("fill_inode %llx.%llx BAD mode 0%o\n", @@ -934,12 +948,11 @@ static int fill_inode(struct inode *inode, struct page *locked_page, } /* were we issued a capability? */ - if (info->cap.caps) { + if (info_caps) { if (ceph_snap(inode) == CEPH_NOSNAP) { - unsigned caps = le32_to_cpu(info->cap.caps); ceph_add_cap(inode, session, le64_to_cpu(info->cap.cap_id), - cap_fmode, caps, + cap_fmode, info_caps, le32_to_cpu(info->cap.wanted), le32_to_cpu(info->cap.seq), le32_to_cpu(info->cap.mseq), @@ -949,7 +962,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, /* set dir completion flag? */ if (S_ISDIR(inode->i_mode) && ci->i_files == 0 && ci->i_subdirs == 0 && - (caps & CEPH_CAP_FILE_SHARED) && + (info_caps & CEPH_CAP_FILE_SHARED) && (issued & CEPH_CAP_FILE_EXCL) == 0 && !__ceph_dir_is_complete(ci)) { dout(" marking %p complete (empty)\n", inode); @@ -962,8 +975,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page, wake = true; } else { dout(" %p got snap_caps %s\n", inode, - ceph_cap_string(le32_to_cpu(info->cap.caps))); - ci->i_snap_caps |= le32_to_cpu(info->cap.caps); + ceph_cap_string(info_caps)); + ci->i_snap_caps |= info_caps; if (cap_fmode >= 0) __ceph_get_fmode(ci, cap_fmode); } @@ -978,8 +991,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; ci->i_inline_version = iinfo->inline_version; if (ci->i_inline_version != CEPH_INLINE_NONE && - (locked_page || - (le32_to_cpu(info->cap.caps) & cache_caps))) + (locked_page || (info_caps & cache_caps))) fill_inline = true; } @@ -1123,6 +1135,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) if (IS_ERR(realdn)) { pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", PTR_ERR(realdn), dn, in, ceph_vinop(in)); + dput(dn); dn = realdn; /* note realdn contains the error */ goto out; } else if (realdn) { @@ -1941,6 +1954,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) int err = 0; int inode_dirty_flags = 0; bool lock_snap_rwsem = false; + struct timespec ts; prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) @@ -2015,44 +2029,44 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) } if (ia_valid & ATTR_ATIME) { - dout("setattr %p atime %ld.%ld -> %ld.%ld\n", inode, - inode->i_atime.tv_sec, inode->i_atime.tv_nsec, - attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); + dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode, + (long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec, + (long long)attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_atime = attr->ia_atime; dirtied |= CEPH_CAP_FILE_EXCL; } else if ((issued & CEPH_CAP_FILE_WR) && - timespec_compare(&inode->i_atime, + timespec64_compare(&inode->i_atime, &attr->ia_atime) < 0) { inode->i_atime = attr->ia_atime; dirtied |= CEPH_CAP_FILE_WR; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || - !timespec_equal(&inode->i_atime, &attr->ia_atime)) { - ceph_encode_timespec(&req->r_args.setattr.atime, - &attr->ia_atime); + !timespec64_equal(&inode->i_atime, &attr->ia_atime)) { + ts = timespec64_to_timespec(attr->ia_atime); + ceph_encode_timespec(&req->r_args.setattr.atime, &ts); mask |= CEPH_SETATTR_ATIME; release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } } if (ia_valid & ATTR_MTIME) { - dout("setattr %p mtime %ld.%ld -> %ld.%ld\n", inode, - inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); + dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, + (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + (long long)attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_mtime = attr->ia_mtime; dirtied |= CEPH_CAP_FILE_EXCL; } else if ((issued & CEPH_CAP_FILE_WR) && - timespec_compare(&inode->i_mtime, + timespec64_compare(&inode->i_mtime, &attr->ia_mtime) < 0) { inode->i_mtime = attr->ia_mtime; dirtied |= CEPH_CAP_FILE_WR; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || - !timespec_equal(&inode->i_mtime, &attr->ia_mtime)) { - ceph_encode_timespec(&req->r_args.setattr.mtime, - &attr->ia_mtime); + !timespec64_equal(&inode->i_mtime, &attr->ia_mtime)) { + ts = timespec64_to_timespec(attr->ia_mtime); + ceph_encode_timespec(&req->r_args.setattr.mtime, &ts); mask |= CEPH_SETATTR_MTIME; release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; @@ -2082,9 +2096,9 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (ia_valid & ATTR_CTIME) { bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME| ATTR_MODE|ATTR_UID|ATTR_GID)) == 0; - dout("setattr %p ctime %ld.%ld -> %ld.%ld (%s)\n", inode, - inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, - attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, + dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode, + (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + (long long)attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, only ? "ctime only" : "ignored"); if (only) { /* @@ -2126,7 +2140,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) req->r_inode_drop = release; req->r_args.setattr.mask = cpu_to_le32(mask); req->r_num_caps = 1; - req->r_stamp = attr->ia_ctime; + req->r_stamp = timespec64_to_timespec(attr->ia_ctime); err = ceph_mdsc_do_request(mdsc, NULL, req); } dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, @@ -2178,6 +2192,7 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page, struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; + int mode; int err; if (ceph_snap(inode) == CEPH_SNAPDIR) { @@ -2190,7 +2205,8 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page, if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) return 0; - req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); + mode = (mask & CEPH_STAT_RSTAT) ? USE_AUTH_MDS : USE_ANY_MDS; + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); if (IS_ERR(req)) return PTR_ERR(req); req->r_inode = inode; @@ -2261,6 +2277,14 @@ int ceph_getattr(const struct path *path, struct kstat *stat, stat->size = ci->i_files + ci->i_subdirs; stat->blocks = 0; stat->blksize = 65536; + /* + * Some applications rely on the number of st_nlink + * value on directories to be either 0 (if unlinked) + * or 2 + number of subdirectories. + */ + if (stat->nlink == 1) + /* '.' + '..' + subdirs */ + stat->nlink = 1 + 1 + ci->i_subdirs; } } return err; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index cf8d24812cc0..dc8bc664a871 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2958,12 +2958,15 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.flock_len = (__force __le32) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { + struct timespec ts; rec.v1.cap_id = cpu_to_le64(cap->cap_id); rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); rec.v1.issued = cpu_to_le32(cap->issued); rec.v1.size = cpu_to_le64(inode->i_size); - ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime); - ceph_encode_timespec(&rec.v1.atime, &inode->i_atime); + ts = timespec64_to_timespec(inode->i_mtime); + ceph_encode_timespec(&rec.v1.mtime, &ts); + ts = timespec64_to_timespec(inode->i_atime); + ceph_encode_timespec(&rec.v1.atime, &ts); rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v1.pathbase = cpu_to_le64(pathbase); } diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 041c27ea8de1..af81555c14fd 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -594,9 +594,9 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, BUG_ON(capsnap->writing); capsnap->size = inode->i_size; - capsnap->mtime = inode->i_mtime; - capsnap->atime = inode->i_atime; - capsnap->ctime = inode->i_ctime; + capsnap->mtime = timespec64_to_timespec(inode->i_mtime); + capsnap->atime = timespec64_to_timespec(inode->i_atime); + capsnap->ctime = timespec64_to_timespec(inode->i_ctime); capsnap->time_warp_seq = ci->i_time_warp_seq; capsnap->truncate_size = ci->i_truncate_size; capsnap->truncate_seq = ci->i_truncate_seq; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b33082e6878f..95a3b3ac9b6e 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -45,7 +45,7 @@ static void ceph_put_super(struct super_block *s) static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) { struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); - struct ceph_monmap *monmap = fsc->client->monc.monmap; + struct ceph_mon_client *monc = &fsc->client->monc; struct ceph_statfs st; u64 fsid; int err; @@ -58,7 +58,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) } dout("statfs\n"); - err = ceph_monc_do_statfs(&fsc->client->monc, data_pool, &st); + err = ceph_monc_do_statfs(monc, data_pool, &st); if (err < 0) return err; @@ -94,8 +94,11 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = NAME_MAX; /* Must convert the fsid, for consistent values across arches */ - fsid = le64_to_cpu(*(__le64 *)(&monmap->fsid)) ^ - le64_to_cpu(*((__le64 *)&monmap->fsid + 1)); + mutex_lock(&monc->mutex); + fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ + le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); + mutex_unlock(&monc->mutex); + buf->f_fsid.val[0] = fsid & 0xffffffff; buf->f_fsid.val[1] = fsid >> 32; @@ -256,19 +259,19 @@ static int parse_fsopt_token(char *c, void *private) break; /* misc */ case Opt_wsize: - if (intval < PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) + if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) return -EINVAL; fsopt->wsize = ALIGN(intval, PAGE_SIZE); break; case Opt_rsize: - if (intval < PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) + if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_READ_SIZE) return -EINVAL; fsopt->rsize = ALIGN(intval, PAGE_SIZE); break; case Opt_rasize: if (intval < 0) return -EINVAL; - fsopt->rasize = ALIGN(intval + PAGE_SIZE - 1, PAGE_SIZE); + fsopt->rasize = ALIGN(intval, PAGE_SIZE); break; case Opt_caps_wanted_delay_min: if (intval < 1) @@ -286,7 +289,7 @@ static int parse_fsopt_token(char *c, void *private) fsopt->max_readdir = intval; break; case Opt_readdir_max_bytes: - if (intval < PAGE_SIZE && intval != 0) + if (intval < (int)PAGE_SIZE && intval != 0) return -EINVAL; fsopt->max_readdir_bytes = intval; break; @@ -534,6 +537,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",noasyncreaddir"); if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) seq_puts(m, ",nodcache"); + if (fsopt->flags & CEPH_MOUNT_OPT_INO32) + seq_puts(m, ",ino32"); if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { seq_show_option(m, "fsc", fsopt->fscache_uniq); } @@ -551,7 +556,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) if (fsopt->mds_namespace) seq_show_option(m, "mds_namespace", fsopt->mds_namespace); - if (fsopt->wsize) + if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) seq_printf(m, ",wsize=%d", fsopt->wsize); if (fsopt->rsize != CEPH_MAX_READ_SIZE) seq_printf(m, ",rsize=%d", fsopt->rsize); @@ -616,7 +621,9 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, err = PTR_ERR(fsc->client); goto fail; } + fsc->client->extra_mon_dispatch = extra_mon_dispatch; + fsc->client->osdc.abort_on_full = true; if (!fsopt->mds_namespace) { ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, @@ -674,6 +681,13 @@ fail: return ERR_PTR(err); } +static void flush_fs_workqueues(struct ceph_fs_client *fsc) +{ + flush_workqueue(fsc->wb_wq); + flush_workqueue(fsc->pg_inv_wq); + flush_workqueue(fsc->trunc_wq); +} + static void destroy_fs_client(struct ceph_fs_client *fsc) { dout("destroy_fs_client %p\n", fsc); @@ -793,6 +807,7 @@ static void ceph_umount_begin(struct super_block *sb) if (!fsc) return; fsc->mount_state = CEPH_MOUNT_SHUTDOWN; + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); ceph_mdsc_force_umount(fsc->mdsc); return; } @@ -1088,6 +1103,8 @@ static void ceph_kill_sb(struct super_block *s) dout("kill_sb %p\n", s); ceph_mdsc_pre_umount(fsc->mdsc); + flush_fs_workqueues(fsc); + generic_shutdown_super(s); fsc->client->extra_mon_dispatch = NULL; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a7077a0c989f..971328b99ede 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1025,8 +1025,7 @@ extern const struct file_operations ceph_file_fops; extern int ceph_renew_caps(struct inode *inode); extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned flags, umode_t mode, - int *opened); + struct file *file, unsigned flags, umode_t mode); extern int ceph_release(struct inode *inode, struct file *filp); extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 315f7e63e7cc..5bc8edb4c2a6 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -50,10 +50,14 @@ struct ceph_vxattr { size_t name_size; /* strlen(name) + 1 (for '\0') */ size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, size_t size); - bool readonly, hidden; bool (*exists_cb)(struct ceph_inode_info *ci); + unsigned int flags; }; +#define VXATTR_FLAG_READONLY (1<<0) +#define VXATTR_FLAG_HIDDEN (1<<1) +#define VXATTR_FLAG_RSTAT (1<<2) + /* layouts */ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) @@ -262,32 +266,31 @@ static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci, #define CEPH_XATTR_NAME2(_type, _name, _name2) \ XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 -#define XATTR_NAME_CEPH(_type, _name) \ +#define XATTR_NAME_CEPH(_type, _name, _flags) \ { \ .name = CEPH_XATTR_NAME(_type, _name), \ .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ - .readonly = true, \ - .hidden = false, \ - .exists_cb = NULL, \ + .exists_cb = NULL, \ + .flags = (VXATTR_FLAG_READONLY | _flags), \ } +#define XATTR_RSTAT_FIELD(_type, _name) \ + XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT) #define XATTR_LAYOUT_FIELD(_type, _name, _field) \ { \ .name = CEPH_XATTR_NAME2(_type, _name, _field), \ .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ - .readonly = false, \ - .hidden = true, \ .exists_cb = ceph_vxattrcb_layout_exists, \ + .flags = VXATTR_FLAG_HIDDEN, \ } #define XATTR_QUOTA_FIELD(_type, _name) \ { \ .name = CEPH_XATTR_NAME(_type, _name), \ .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)), \ .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ - .readonly = false, \ - .hidden = true, \ .exists_cb = ceph_vxattrcb_quota_exists, \ + .flags = VXATTR_FLAG_HIDDEN, \ } static struct ceph_vxattr ceph_dir_vxattrs[] = { @@ -295,30 +298,28 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { .name = "ceph.dir.layout", .name_size = sizeof("ceph.dir.layout"), .getxattr_cb = ceph_vxattrcb_layout, - .readonly = false, - .hidden = true, .exists_cb = ceph_vxattrcb_layout_exists, + .flags = VXATTR_FLAG_HIDDEN, }, XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), XATTR_LAYOUT_FIELD(dir, layout, stripe_count), XATTR_LAYOUT_FIELD(dir, layout, object_size), XATTR_LAYOUT_FIELD(dir, layout, pool), XATTR_LAYOUT_FIELD(dir, layout, pool_namespace), - XATTR_NAME_CEPH(dir, entries), - XATTR_NAME_CEPH(dir, files), - XATTR_NAME_CEPH(dir, subdirs), - XATTR_NAME_CEPH(dir, rentries), - XATTR_NAME_CEPH(dir, rfiles), - XATTR_NAME_CEPH(dir, rsubdirs), - XATTR_NAME_CEPH(dir, rbytes), - XATTR_NAME_CEPH(dir, rctime), + XATTR_NAME_CEPH(dir, entries, 0), + XATTR_NAME_CEPH(dir, files, 0), + XATTR_NAME_CEPH(dir, subdirs, 0), + XATTR_RSTAT_FIELD(dir, rentries), + XATTR_RSTAT_FIELD(dir, rfiles), + XATTR_RSTAT_FIELD(dir, rsubdirs), + XATTR_RSTAT_FIELD(dir, rbytes), + XATTR_RSTAT_FIELD(dir, rctime), { .name = "ceph.quota", .name_size = sizeof("ceph.quota"), .getxattr_cb = ceph_vxattrcb_quota, - .readonly = false, - .hidden = true, .exists_cb = ceph_vxattrcb_quota_exists, + .flags = VXATTR_FLAG_HIDDEN, }, XATTR_QUOTA_FIELD(quota, max_bytes), XATTR_QUOTA_FIELD(quota, max_files), @@ -333,9 +334,8 @@ static struct ceph_vxattr ceph_file_vxattrs[] = { .name = "ceph.file.layout", .name_size = sizeof("ceph.file.layout"), .getxattr_cb = ceph_vxattrcb_layout, - .readonly = false, - .hidden = true, .exists_cb = ceph_vxattrcb_layout_exists, + .flags = VXATTR_FLAG_HIDDEN, }, XATTR_LAYOUT_FIELD(file, layout, stripe_unit), XATTR_LAYOUT_FIELD(file, layout, stripe_count), @@ -374,9 +374,10 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) struct ceph_vxattr *vxattr; size_t size = 0; - for (vxattr = vxattrs; vxattr->name; vxattr++) - if (!vxattr->hidden) + for (vxattr = vxattrs; vxattr->name; vxattr++) { + if (!(vxattr->flags & VXATTR_FLAG_HIDDEN)) size += vxattr->name_size; + } return size; } @@ -809,7 +810,10 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, /* let's see if a virtual xattr was requested */ vxattr = ceph_match_vxattr(inode, name); if (vxattr) { - err = ceph_do_getattr(inode, 0, true); + int mask = 0; + if (vxattr->flags & VXATTR_FLAG_RSTAT) + mask |= CEPH_STAT_RSTAT; + err = ceph_do_getattr(inode, mask, true); if (err) return err; err = -ENODATA; @@ -919,7 +923,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) err = namelen; if (vxattrs) { for (i = 0; vxattrs[i].name; i++) { - if (!vxattrs[i].hidden && + if (!(vxattrs[i].flags & VXATTR_FLAG_HIDDEN) && !(vxattrs[i].exists_cb && !vxattrs[i].exists_cb(ci))) { len = sprintf(names, "%s", vxattrs[i].name); @@ -1024,7 +1028,7 @@ int __ceph_setxattr(struct inode *inode, const char *name, vxattr = ceph_match_vxattr(inode, name); if (vxattr) { - if (vxattr->readonly) + if (vxattr->flags & VXATTR_FLAG_READONLY) return -EOPNOTSUPP; if (value && !strncmp(vxattr->name, "ceph.quota", 10)) check_realm = true; diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index edf5f40898bf..e1553d1e0e50 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -128,8 +128,8 @@ fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data, memset(&auxdata, 0, sizeof(auxdata)); auxdata.eof = cifsi->server_eof; - auxdata.last_write_time = cifsi->vfs_inode.i_mtime; - auxdata.last_change_time = cifsi->vfs_inode.i_ctime; + auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime); + auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime); if (memcmp(data, &auxdata, datalen) != 0) return FSCACHE_CHECKAUX_OBSOLETE; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 116146022aa1..bfe999505815 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -126,6 +126,25 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon) seq_putc(m, '\n'); } +static void +cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface) +{ + struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr; + struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&iface->sockaddr; + + seq_printf(m, "\t\tSpeed: %zu bps\n", iface->speed); + seq_puts(m, "\t\tCapabilities: "); + if (iface->rdma_capable) + seq_puts(m, "rdma "); + if (iface->rss_capable) + seq_puts(m, "rss "); + seq_putc(m, '\n'); + if (iface->sockaddr.ss_family == AF_INET) + seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr); + else if (iface->sockaddr.ss_family == AF_INET6) + seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr); +} + static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { struct list_head *tmp1, *tmp2, *tmp3; @@ -312,6 +331,16 @@ skip_rdma: mid_entry->mid); } spin_unlock(&GlobalMid_Lock); + + spin_lock(&ses->iface_lock); + if (ses->iface_count) + seq_printf(m, "\n\tServer interfaces: %zu\n", + ses->iface_count); + for (j = 0; j < ses->iface_count; j++) { + seq_printf(m, "\t%d)\n", j); + cifs_dump_iface(m, &ses->iface_list[j]); + } + spin_unlock(&ses->iface_lock); } } spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 937251cc61c0..ee2a8ec70056 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -37,7 +37,6 @@ #include <crypto/aead.h> int __cifs_calc_signature(struct smb_rqst *rqst, - int start, struct TCP_Server_Info *server, char *signature, struct shash_desc *shash) { @@ -45,16 +44,27 @@ int __cifs_calc_signature(struct smb_rqst *rqst, int rc; struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; + int is_smb2 = server->vals->header_preamble_size == 0; - for (i = start; i < n_vec; i++) { + /* iov[0] is actual data and not the rfc1002 length for SMB2+ */ + if (is_smb2) { + if (iov[0].iov_len <= 4) + return -EIO; + i = 0; + } else { + if (n_vec < 2 || iov[0].iov_len != 4) + return -EIO; + i = 1; /* skip rfc1002 length */ + } + + for (; i < n_vec; i++) { if (iov[i].iov_len == 0) continue; if (iov[i].iov_base == NULL) { cifs_dbg(VFS, "null iovec entry\n"); return -EIO; } - if (i == 1 && iov[1].iov_len <= 4) - break; /* nothing to sign or corrupt header */ + rc = crypto_shash_update(shash, iov[i].iov_base, iov[i].iov_len); if (rc) { @@ -118,7 +128,7 @@ static int cifs_calc_signature(struct smb_rqst *rqst, return rc; } - return __cifs_calc_signature(rqst, 1, server, signature, + return __cifs_calc_signature(rqst, server, signature, &server->secmech.sdescmd5->shash); } diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 5f0231803431..f3a78efc3109 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -65,8 +65,7 @@ extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, bool excl); extern int cifs_atomic_open(struct inode *, struct dentry *, - struct file *, unsigned, umode_t, - int *); + struct file *, unsigned, umode_t); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, unsigned int); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1efa2e65bc1a..c923c7854027 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -33,6 +33,9 @@ #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ +#define CIFS_PORT 445 +#define RFC1001_PORT 139 + /* * The sizes of various internal tables and strings */ @@ -312,6 +315,10 @@ struct smb_version_operations { /* send echo request */ int (*echo)(struct TCP_Server_Info *); /* create directory */ + int (*posix_mkdir)(const unsigned int xid, struct inode *inode, + umode_t mode, struct cifs_tcon *tcon, + const char *full_path, + struct cifs_sb_info *cifs_sb); int (*mkdir)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *); /* set info on created directory */ @@ -416,7 +423,7 @@ struct smb_version_operations { void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int, bool *); /* create lease context buffer for CREATE request */ - char * (*create_lease_buf)(u8 *, u8); + char * (*create_lease_buf)(u8 *lease_key, u8 oplock); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey); ssize_t (*copychunk_range)(const unsigned int, @@ -838,6 +845,13 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) #endif +struct cifs_server_iface { + size_t speed; + unsigned int rdma_capable : 1; + unsigned int rss_capable : 1; + struct sockaddr_storage sockaddr; +}; + /* * Session structure. One of these for each uid session with a particular host */ @@ -875,6 +889,20 @@ struct cifs_ses { #ifdef CONFIG_CIFS_SMB311 __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; #endif /* 3.1.1 */ + + /* + * Network interfaces available on the server this session is + * connected to. + * + * Other channels can be opened by connecting and binding this + * session to interfaces from this list. + * + * iface_lock should be taken when accessing any of these fields + */ + spinlock_t iface_lock; + struct cifs_server_iface *iface_list; + size_t iface_count; + unsigned long iface_last_update; /* jiffies */ }; static inline bool @@ -883,6 +911,14 @@ cap_unix(struct cifs_ses *ses) return ses->server->vals->cap_unix & ses->capabilities; } +struct cached_fid { + bool is_valid:1; /* Do we have a useable root fid */ + struct cifs_fid *fid; + struct mutex fid_mutex; + struct cifs_tcon *tcon; + struct work_struct lease_break; +}; + /* * there is one of these for each connection to a resource on a particular * session @@ -987,9 +1023,7 @@ struct cifs_tcon { struct fscache_cookie *fscache; /* cookie for share */ #endif struct list_head pending_opens; /* list of incomplete opens */ - bool valid_root_fid:1; /* Do we have a useable root fid */ - struct mutex prfid_mutex; /* prevents reopen race after dead ses*/ - struct cifs_fid *prfid; /* handle to the directory at top of share */ + struct cached_fid crfid; /* Cached root fid */ /* BB add field for back pointer to sb struct(s)? */ }; @@ -1382,6 +1416,7 @@ typedef int (mid_handle_t)(struct TCP_Server_Info *server, /* one of these for every pending CIFS request to the server */ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ + struct kref refcount; struct TCP_Server_Info *server; /* server corresponding to this mid */ __u64 mid; /* multiplex id */ __u32 pid; /* process id */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 4e0d183c3d10..1890f534c88b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -82,6 +82,7 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); extern void DeleteMidQEntry(struct mid_q_entry *midEntry); extern void cifs_delete_mid(struct mid_q_entry *mid); +extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry); extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); @@ -112,10 +113,6 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */, const int flags, struct kvec * /* resp vec */); -extern int smb2_send_recv(const unsigned int xid, struct cifs_ses *pses, - struct kvec *pkvec, int nvec_to_send, - int *pbuftype, const int flags, - struct kvec *presp); extern int SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *ptcon, struct smb_hdr *in_buf , @@ -544,7 +541,7 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const unsigned char *path, char *pbuf, unsigned int *pbytes_written); -int __cifs_calc_signature(struct smb_rqst *rqst, int start, +int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, char *signature, struct shash_desc *shash); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, @@ -552,6 +549,7 @@ enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, struct cifs_aio_ctx *cifs_aio_ctx_alloc(void); void cifs_aio_ctx_release(struct kref *refcount); int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw); +void smb2_cached_lease_break(struct work_struct *work); int cifs_alloc_hash(const char *name, struct crypto_shash **shash, struct sdesc **sdesc); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 42329b25877d..93408eab92e7 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -107,10 +107,10 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) } spin_unlock(&tcon->open_file_lock); - mutex_lock(&tcon->prfid_mutex); - tcon->valid_root_fid = false; - memset(tcon->prfid, 0, sizeof(struct cifs_fid)); - mutex_unlock(&tcon->prfid_mutex); + mutex_lock(&tcon->crfid.fid_mutex); + tcon->crfid.is_valid = false; + memset(tcon->crfid.fid, 0, sizeof(struct cifs_fid)); + mutex_unlock(&tcon->crfid.fid_mutex); /* * BB Add call to invalidate_inodes(sb) for all superblocks mounted @@ -157,8 +157,14 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) * greater than cifs socket timeout which is 7 seconds */ while (server->tcpStatus == CifsNeedReconnect) { - wait_event_interruptible_timeout(server->response_q, - (server->tcpStatus != CifsNeedReconnect), 10 * HZ); + rc = wait_event_interruptible_timeout(server->response_q, + (server->tcpStatus != CifsNeedReconnect), + 10 * HZ); + if (rc < 0) { + cifs_dbg(FYI, "%s: aborting reconnect due to a received" + " signal by the process\n", __func__); + return -ERESTARTSYS; + } /* are we still trying to reconnect? */ if (server->tcpStatus != CifsNeedReconnect) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 96645a7d8f27..5df2c0698cda 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -57,9 +57,6 @@ #include "smb2proto.h" #include "smbdirect.h" -#define CIFS_PORT 445 -#define RFC1001_PORT 139 - extern mempool_t *cifs_req_poolp; extern bool disable_legacy_dialects; @@ -927,6 +924,7 @@ next_pdu: server->pdu_size = next_offset; } + mid_entry = NULL; if (server->ops->is_transform_hdr && server->ops->receive_transform && server->ops->is_transform_hdr(buf)) { @@ -941,8 +939,11 @@ next_pdu: length = mid_entry->receive(server, mid_entry); } - if (length < 0) + if (length < 0) { + if (mid_entry) + cifs_mid_q_entry_release(mid_entry); continue; + } if (server->large_buf) buf = server->bigbuf; @@ -959,6 +960,8 @@ next_pdu: if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); + + cifs_mid_q_entry_release(mid_entry); } else if (server->ops->is_oplock_break && server->ops->is_oplock_break(buf, server)) { cifs_dbg(FYI, "Received oplock break\n"); @@ -3029,8 +3032,11 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) #ifdef CONFIG_CIFS_SMB311 if ((volume_info->linux_ext) && (ses->server->posix_ext_supported)) { - if (ses->server->vals->protocol_id == SMB311_PROT_ID) + if (ses->server->vals->protocol_id == SMB311_PROT_ID) { tcon->posix_extensions = true; + printk_once(KERN_WARNING + "SMB3.11 POSIX Extensions are experimental\n"); + } } #endif /* 311 */ diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ddae52bd1993..3713d22b95a7 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -465,8 +465,7 @@ out_err: int cifs_atomic_open(struct inode *inode, struct dentry *direntry, - struct file *file, unsigned oflags, umode_t mode, - int *opened) + struct file *file, unsigned oflags, umode_t mode) { int rc; unsigned int xid; @@ -539,9 +538,9 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, } if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; - rc = finish_open(file, direntry, generic_file_open, opened); + rc = finish_open(file, direntry, generic_file_open); if (rc) { if (server->ops->close) server->ops->close(xid, tcon, &fid); diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 25d3f66b2d50..85145a763021 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -129,8 +129,8 @@ static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi, memset(&auxdata, 0, sizeof(auxdata)); auxdata.eof = cifsi->server_eof; - auxdata.last_write_time = cifsi->vfs_inode.i_mtime; - auxdata.last_change_time = cifsi->vfs_inode.i_ctime; + auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime); + auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime); cifsi->fscache = fscache_acquire_cookie(tcon->fscache, @@ -166,8 +166,8 @@ void cifs_fscache_release_inode_cookie(struct inode *inode) if (cifsi->fscache) { memset(&auxdata, 0, sizeof(auxdata)); auxdata.eof = cifsi->server_eof; - auxdata.last_write_time = cifsi->vfs_inode.i_mtime; - auxdata.last_change_time = cifsi->vfs_inode.i_ctime; + auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime); + auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime); cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache); fscache_relinquish_cookie(cifsi->fscache, &auxdata, false); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a94071c7b408..a2cfb33e85c1 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -95,6 +95,7 @@ static void cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) { struct cifsInodeInfo *cifs_i = CIFS_I(inode); + struct timespec ts; cifs_dbg(FYI, "%s: revalidating inode %llu\n", __func__, cifs_i->uniqueid); @@ -113,7 +114,8 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) } /* revalidate if mtime or size have changed */ - if (timespec_equal(&inode->i_mtime, &fattr->cf_mtime) && + ts = timespec64_to_timespec(inode->i_mtime); + if (timespec_equal(&ts, &fattr->cf_mtime) && cifs_i->server_eof == fattr->cf_eof) { cifs_dbg(FYI, "%s: inode %llu is unchanged\n", __func__, cifs_i->uniqueid); @@ -162,9 +164,9 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) cifs_revalidate_cache(inode, fattr); spin_lock(&inode->i_lock); - inode->i_atime = fattr->cf_atime; - inode->i_mtime = fattr->cf_mtime; - inode->i_ctime = fattr->cf_ctime; + inode->i_atime = timespec_to_timespec64(fattr->cf_atime); + inode->i_mtime = timespec_to_timespec64(fattr->cf_mtime); + inode->i_ctime = timespec_to_timespec64(fattr->cf_ctime); inode->i_rdev = fattr->cf_rdev; cifs_nlink_fattr_to_inode(inode, fattr); inode->i_uid = fattr->cf_uid; @@ -1123,14 +1125,14 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, if (attrs->ia_valid & ATTR_ATIME) { set_time = true; info_buf.LastAccessTime = - cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); + cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_atime))); } else info_buf.LastAccessTime = 0; if (attrs->ia_valid & ATTR_MTIME) { set_time = true; info_buf.LastWriteTime = - cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); + cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_mtime))); } else info_buf.LastWriteTime = 0; @@ -1143,7 +1145,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, if (set_time && (attrs->ia_valid & ATTR_CTIME)) { cifs_dbg(FYI, "CIFS - CTIME changed\n"); info_buf.ChangeTime = - cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); + cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_ctime))); } else info_buf.ChangeTime = 0; @@ -1573,6 +1575,17 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) goto mkdir_out; } + server = tcon->ses->server; + +#ifdef CONFIG_CIFS_SMB311 + if ((server->ops->posix_mkdir) && (tcon->posix_extensions)) { + rc = server->ops->posix_mkdir(xid, inode, mode, tcon, full_path, + cifs_sb); + d_drop(direntry); /* for time being always refresh inode info */ + goto mkdir_out; + } +#endif /* SMB311 */ + if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = cifs_posix_mkdir(inode, direntry, mode, full_path, cifs_sb, @@ -1581,8 +1594,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) goto mkdir_out; } - server = tcon->ses->server; - if (!server->ops->mkdir) { rc = -ENOSYS; goto mkdir_out; @@ -2060,8 +2071,8 @@ int cifs_getattr(const struct path *path, struct kstat *stat, /* old CIFS Unix Extensions doesn't return create time */ if (CIFS_I(inode)->createtime) { stat->result_mask |= STATX_BTIME; - stat->btime = - cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime)); + stat->btime = timespec_to_timespec64( + cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime))); } stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED); @@ -2267,17 +2278,17 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->gid = INVALID_GID; /* no change */ if (attrs->ia_valid & ATTR_ATIME) - args->atime = cifs_UnixTimeToNT(attrs->ia_atime); + args->atime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_atime)); else args->atime = NO_CHANGE_64; if (attrs->ia_valid & ATTR_MTIME) - args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime); + args->mtime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_mtime)); else args->mtime = NO_CHANGE_64; if (attrs->ia_valid & ATTR_CTIME) - args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime); + args->ctime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_ctime)); else args->ctime = NO_CHANGE_64; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index af29ade195c0..53e8362cbc4a 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -82,6 +82,7 @@ sesInfoAlloc(void) INIT_LIST_HEAD(&ret_buf->smb_ses_list); INIT_LIST_HEAD(&ret_buf->tcon_list); mutex_init(&ret_buf->session_mutex); + spin_lock_init(&ret_buf->iface_lock); } return ret_buf; } @@ -102,6 +103,7 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); kzfree(buf_to_free->auth_key.response); + kfree(buf_to_free->iface_list); kzfree(buf_to_free); } @@ -117,8 +119,9 @@ tconInfoAlloc(void) INIT_LIST_HEAD(&ret_buf->openFileList); INIT_LIST_HEAD(&ret_buf->tcon_list); spin_lock_init(&ret_buf->open_file_lock); - mutex_init(&ret_buf->prfid_mutex); - ret_buf->prfid = kzalloc(sizeof(struct cifs_fid), GFP_KERNEL); + mutex_init(&ret_buf->crfid.fid_mutex); + ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid), + GFP_KERNEL); #ifdef CONFIG_CIFS_STATS spin_lock_init(&ret_buf->stat_lock); #endif @@ -136,7 +139,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free) atomic_dec(&tconInfoAllocCount); kfree(buf_to_free->nativeFileSystem); kzfree(buf_to_free->password); - kfree(buf_to_free->prfid); + kfree(buf_to_free->crfid.fid); kfree(buf_to_free); } diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index aff8ce8ba34d..646dcd149de1 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -107,6 +107,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) if (compare_mid(mid->mid, buf) && mid->mid_state == MID_REQUEST_SUBMITTED && le16_to_cpu(mid->command) == buf->Command) { + kref_get(&mid->refcount); spin_unlock(&GlobalMid_Lock); return mid; } diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 788412675723..4ed10dd086e6 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -41,7 +41,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, int rc; __le16 *smb2_path; struct smb2_file_all_info *smb2_data = NULL; - __u8 smb2_oplock[17]; + __u8 smb2_oplock; struct cifs_fid *fid = oparms->fid; struct network_resiliency_req nr_ioctl_req; @@ -59,12 +59,9 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, } oparms->desired_access |= FILE_READ_ATTRIBUTES; - *smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH; + smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH; - if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) - memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); - - rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data, NULL, + rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, NULL); if (rc) goto out; @@ -101,7 +98,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, move_smb2_info_to_cifs(buf, smb2_data); } - *oplock = *smb2_oplock; + *oplock = smb2_oplock; out: kfree(smb2_data); kfree(smb2_path); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index e2bec47c6845..3ff7cec2da81 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -454,7 +454,8 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb) #ifdef CONFIG_CIFS_SMB311 /* SMB311 POSIX extensions paths do not include leading slash */ else if (cifs_sb_master_tlink(cifs_sb) && - cifs_sb_master_tcon(cifs_sb)->posix_extensions) { + cifs_sb_master_tcon(cifs_sb)->posix_extensions && + (from[0] == '/')) { start_of_path = from + 1; } #endif /* 311 */ @@ -492,10 +493,11 @@ cifs_ses_oplock_break(struct work_struct *work) { struct smb2_lease_break_work *lw = container_of(work, struct smb2_lease_break_work, lease_break); - int rc; + int rc = 0; rc = SMB2_lease_break(0, tlink_tcon(lw->tlink), lw->lease_key, lw->lease_state); + cifs_dbg(FYI, "Lease release rc %d\n", rc); cifs_put_tlink(lw->tlink); kfree(lw); @@ -561,6 +563,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, open->oplock = lease_state; } + return found; } @@ -603,6 +606,18 @@ smb2_is_valid_lease_break(char *buffer) return true; } spin_unlock(&tcon->open_file_lock); + + if (tcon->crfid.is_valid && + !memcmp(rsp->LeaseKey, + tcon->crfid.fid->lease_key, + SMB2_LEASE_KEY_SIZE)) { + INIT_WORK(&tcon->crfid.lease_break, + smb2_cached_lease_break); + queue_work(cifsiod_wq, + &tcon->crfid.lease_break); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } } } } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b15f5957d645..ea92a38b2f08 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -203,6 +203,7 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf) if ((mid->mid == wire_mid) && (mid->mid_state == MID_REQUEST_SUBMITTED) && (mid->command == shdr->Command)) { + kref_get(&mid->refcount); spin_unlock(&GlobalMid_Lock); return mid; } @@ -294,34 +295,191 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) return rsize; } -#ifdef CONFIG_CIFS_STATS2 + +static int +parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, + size_t buf_len, + struct cifs_server_iface **iface_list, + size_t *iface_count) +{ + struct network_interface_info_ioctl_rsp *p; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + struct iface_info_ipv4 *p4; + struct iface_info_ipv6 *p6; + struct cifs_server_iface *info; + ssize_t bytes_left; + size_t next = 0; + int nb_iface = 0; + int rc = 0; + + *iface_list = NULL; + *iface_count = 0; + + /* + * Fist pass: count and sanity check + */ + + bytes_left = buf_len; + p = buf; + while (bytes_left >= sizeof(*p)) { + nb_iface++; + next = le32_to_cpu(p->Next); + if (!next) { + bytes_left -= sizeof(*p); + break; + } + p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next); + bytes_left -= next; + } + + if (!nb_iface) { + cifs_dbg(VFS, "%s: malformed interface info\n", __func__); + rc = -EINVAL; + goto out; + } + + if (bytes_left || p->Next) + cifs_dbg(VFS, "%s: incomplete interface info\n", __func__); + + + /* + * Second pass: extract info to internal structure + */ + + *iface_list = kcalloc(nb_iface, sizeof(**iface_list), GFP_KERNEL); + if (!*iface_list) { + rc = -ENOMEM; + goto out; + } + + info = *iface_list; + bytes_left = buf_len; + p = buf; + while (bytes_left >= sizeof(*p)) { + info->speed = le64_to_cpu(p->LinkSpeed); + info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE); + info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE); + + cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count); + cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed); + cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__, + le32_to_cpu(p->Capability)); + + switch (p->Family) { + /* + * The kernel and wire socket structures have the same + * layout and use network byte order but make the + * conversion explicit in case either one changes. + */ + case INTERNETWORK: + addr4 = (struct sockaddr_in *)&info->sockaddr; + p4 = (struct iface_info_ipv4 *)p->Buffer; + addr4->sin_family = AF_INET; + memcpy(&addr4->sin_addr, &p4->IPv4Address, 4); + + /* [MS-SMB2] 2.2.32.5.1.1 Clients MUST ignore these */ + addr4->sin_port = cpu_to_be16(CIFS_PORT); + + cifs_dbg(FYI, "%s: ipv4 %pI4\n", __func__, + &addr4->sin_addr); + break; + case INTERNETWORKV6: + addr6 = (struct sockaddr_in6 *)&info->sockaddr; + p6 = (struct iface_info_ipv6 *)p->Buffer; + addr6->sin6_family = AF_INET6; + memcpy(&addr6->sin6_addr, &p6->IPv6Address, 16); + + /* [MS-SMB2] 2.2.32.5.1.2 Clients MUST ignore these */ + addr6->sin6_flowinfo = 0; + addr6->sin6_scope_id = 0; + addr6->sin6_port = cpu_to_be16(CIFS_PORT); + + cifs_dbg(FYI, "%s: ipv6 %pI6\n", __func__, + &addr6->sin6_addr); + break; + default: + cifs_dbg(VFS, + "%s: skipping unsupported socket family\n", + __func__); + goto next_iface; + } + + (*iface_count)++; + info++; +next_iface: + next = le32_to_cpu(p->Next); + if (!next) + break; + p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next); + bytes_left -= next; + } + + if (!*iface_count) { + rc = -EINVAL; + goto out; + } + +out: + if (rc) { + kfree(*iface_list); + *iface_count = 0; + *iface_list = NULL; + } + return rc; +} + + static int SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) { int rc; unsigned int ret_data_len = 0; - struct network_interface_info_ioctl_rsp *out_buf; + struct network_interface_info_ioctl_rsp *out_buf = NULL; + struct cifs_server_iface *iface_list; + size_t iface_count; + struct cifs_ses *ses = tcon->ses; rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */, NULL /* no data input */, 0 /* no data input */, (char **)&out_buf, &ret_data_len); - if (rc != 0) + if (rc != 0) { cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc); - else if (ret_data_len < sizeof(struct network_interface_info_ioctl_rsp)) { - cifs_dbg(VFS, "server returned bad net interface info buf\n"); - rc = -EINVAL; - } else { - /* Dump info on first interface */ - cifs_dbg(FYI, "Adapter Capability 0x%x\t", - le32_to_cpu(out_buf->Capability)); - cifs_dbg(FYI, "Link Speed %lld\n", - le64_to_cpu(out_buf->LinkSpeed)); + goto out; } + + rc = parse_server_interfaces(out_buf, ret_data_len, + &iface_list, &iface_count); + if (rc) + goto out; + + spin_lock(&ses->iface_lock); + kfree(ses->iface_list); + ses->iface_list = iface_list; + ses->iface_count = iface_count; + ses->iface_last_update = jiffies; + spin_unlock(&ses->iface_lock); + +out: kfree(out_buf); return rc; } -#endif /* STATS2 */ + +void +smb2_cached_lease_break(struct work_struct *work) +{ + struct cached_fid *cfid = container_of(work, + struct cached_fid, lease_break); + mutex_lock(&cfid->fid_mutex); + if (cfid->is_valid) { + cifs_dbg(FYI, "clear cached root file handle\n"); + SMB2_close(0, cfid->tcon, cfid->fid->persistent_fid, + cfid->fid->volatile_fid); + cfid->is_valid = false; + } + mutex_unlock(&cfid->fid_mutex); +} /* * Open the directory at the root of a share @@ -331,13 +489,13 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) struct cifs_open_parms oparams; int rc; __le16 srch_path = 0; /* Null - since an open of top of share */ - u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + u8 oplock = SMB2_OPLOCK_LEVEL_II; - mutex_lock(&tcon->prfid_mutex); - if (tcon->valid_root_fid) { + mutex_lock(&tcon->crfid.fid_mutex); + if (tcon->crfid.is_valid) { cifs_dbg(FYI, "found a cached root file handle\n"); - memcpy(pfid, tcon->prfid, sizeof(struct cifs_fid)); - mutex_unlock(&tcon->prfid_mutex); + memcpy(pfid, tcon->crfid.fid, sizeof(struct cifs_fid)); + mutex_unlock(&tcon->crfid.fid_mutex); return 0; } @@ -350,10 +508,11 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) rc = SMB2_open(xid, &oparams, &srch_path, &oplock, NULL, NULL, NULL); if (rc == 0) { - memcpy(tcon->prfid, pfid, sizeof(struct cifs_fid)); - tcon->valid_root_fid = true; + memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid)); + tcon->crfid.tcon = tcon; + tcon->crfid.is_valid = true; } - mutex_unlock(&tcon->prfid_mutex); + mutex_unlock(&tcon->crfid.fid_mutex); return rc; } @@ -383,9 +542,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) if (rc) return; -#ifdef CONFIG_CIFS_STATS2 SMB3_request_interfaces(xid, tcon); -#endif /* STATS2 */ SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, FS_ATTRIBUTE_INFORMATION); @@ -436,7 +593,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_fid fid; - if ((*full_path == 0) && tcon->valid_root_fid) + if ((*full_path == 0) && tcon->crfid.is_valid) return 0; utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); @@ -699,6 +856,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_set_ea(xid, tcon, fid.persistent_fid, fid.volatile_fid, ea, len); + kfree(ea); + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); return rc; @@ -2063,8 +2222,7 @@ smb2_create_lease_buf(u8 *lease_key, u8 oplock) if (!buf) return NULL; - buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); - buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseState = map_oplock_to_lease(oplock); buf->ccontext.DataOffset = cpu_to_le16(offsetof @@ -2090,8 +2248,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock) if (!buf) return NULL; - buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); - buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseState = map_oplock_to_lease(oplock); buf->ccontext.DataOffset = cpu_to_le16(offsetof @@ -2128,8 +2285,7 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) return SMB2_OPLOCK_LEVEL_NOCHANGE; if (lease_key) - memcpy(lease_key, &lc->lcontext.LeaseKeyLow, - SMB2_LEASE_KEY_SIZE); + memcpy(lease_key, &lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); return le32_to_cpu(lc->lcontext.LeaseState); } @@ -2151,7 +2307,7 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, struct smb_rqst *old_rq) { struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)old_rq->rq_iov[1].iov_base; + (struct smb2_sync_hdr *)old_rq->rq_iov[0].iov_base; memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr)); tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM; @@ -2171,14 +2327,13 @@ static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf, } /* Assumes: - * rqst->rq_iov[0] is rfc1002 length - * rqst->rq_iov[1] is tranform header - * rqst->rq_iov[2+] data to be encrypted/decrypted + * rqst->rq_iov[0] is transform header + * rqst->rq_iov[1+] data to be encrypted/decrypted */ static struct scatterlist * init_sg(struct smb_rqst *rqst, u8 *sign) { - unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages; + unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages + 1; unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20; struct scatterlist *sg; unsigned int i; @@ -2189,10 +2344,10 @@ init_sg(struct smb_rqst *rqst, u8 *sign) return NULL; sg_init_table(sg, sg_len); - smb2_sg_set_buf(&sg[0], rqst->rq_iov[1].iov_base + 20, assoc_data_len); - for (i = 1; i < rqst->rq_nvec - 1; i++) - smb2_sg_set_buf(&sg[i], rqst->rq_iov[i+1].iov_base, - rqst->rq_iov[i+1].iov_len); + smb2_sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 20, assoc_data_len); + for (i = 1; i < rqst->rq_nvec; i++) + smb2_sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base, + rqst->rq_iov[i].iov_len); for (j = 0; i < sg_len - 1; i++, j++) { unsigned int len, offset; @@ -2224,18 +2379,17 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) return 1; } /* - * Encrypt or decrypt @rqst message. @rqst has the following format: - * iov[0] - rfc1002 length - * iov[1] - transform header (associate data), - * iov[2-N] and pages - data to encrypt. - * On success return encrypted data in iov[2-N] and pages, leave iov[0-1] + * Encrypt or decrypt @rqst message. @rqst[0] has the following format: + * iov[0] - transform header (associate data), + * iov[1-N] - SMB2 header and pages - data to encrypt. + * On success return encrypted data in iov[1-N] and pages, leave iov[0] * untouched. */ static int crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc) { struct smb2_transform_hdr *tr_hdr = - (struct smb2_transform_hdr *)rqst->rq_iov[1].iov_base; + (struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base; unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20; int rc = 0; struct scatterlist *sg; @@ -2323,10 +2477,6 @@ free_req: return rc; } -/* - * This is called from smb_send_rqst. At this point we have the rfc1002 - * header as the first element in the vector. - */ static int smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq, struct smb_rqst *old_rq) @@ -2335,7 +2485,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq, struct page **pages; struct smb2_transform_hdr *tr_hdr; unsigned int npages = old_rq->rq_npages; - unsigned int orig_len = get_rfc1002_length(old_rq->rq_iov[0].iov_base); + unsigned int orig_len; int i; int rc = -ENOMEM; @@ -2355,18 +2505,14 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq, goto err_free_pages; } - /* Make space for one extra iov to hold the transform header */ iov = kmalloc_array(old_rq->rq_nvec + 1, sizeof(struct kvec), GFP_KERNEL); if (!iov) goto err_free_pages; - /* copy all iovs from the old except the 1st one (rfc1002 length) */ - memcpy(&iov[2], &old_rq->rq_iov[1], - sizeof(struct kvec) * (old_rq->rq_nvec - 1)); - /* copy the rfc1002 iov */ - iov[0].iov_base = old_rq->rq_iov[0].iov_base; - iov[0].iov_len = old_rq->rq_iov[0].iov_len; + /* copy all iovs from the old */ + memcpy(&iov[1], &old_rq->rq_iov[0], + sizeof(struct kvec) * old_rq->rq_nvec); new_rq->rq_iov = iov; new_rq->rq_nvec = old_rq->rq_nvec + 1; @@ -2375,14 +2521,12 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq, if (!tr_hdr) goto err_free_iov; + orig_len = smb_rqst_len(server, old_rq); + /* fill the 2nd iov with a transform header */ fill_transform_hdr(tr_hdr, orig_len, old_rq); - new_rq->rq_iov[1].iov_base = tr_hdr; - new_rq->rq_iov[1].iov_len = sizeof(struct smb2_transform_hdr); - - /* Update rfc1002 header */ - inc_rfc1001_len(new_rq->rq_iov[0].iov_base, - sizeof(struct smb2_transform_hdr)); + new_rq->rq_iov[0].iov_base = tr_hdr; + new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr); /* copy pages form the old */ for (i = 0; i < npages; i++) { @@ -2426,7 +2570,7 @@ smb3_free_transform_rq(struct smb_rqst *rqst) put_page(rqst->rq_pages[i]); kfree(rqst->rq_pages); /* free transform header */ - kfree(rqst->rq_iov[1].iov_base); + kfree(rqst->rq_iov[0].iov_base); kfree(rqst->rq_iov); } @@ -2443,19 +2587,17 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, unsigned int buf_data_size, struct page **pages, unsigned int npages, unsigned int page_data_size) { - struct kvec iov[3]; + struct kvec iov[2]; struct smb_rqst rqst = {NULL}; int rc; - iov[0].iov_base = NULL; - iov[0].iov_len = 0; - iov[1].iov_base = buf; - iov[1].iov_len = sizeof(struct smb2_transform_hdr); - iov[2].iov_base = buf + sizeof(struct smb2_transform_hdr); - iov[2].iov_len = buf_data_size; + iov[0].iov_base = buf; + iov[0].iov_len = sizeof(struct smb2_transform_hdr); + iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr); + iov[1].iov_len = buf_data_size; rqst.rq_iov = iov; - rqst.rq_nvec = 3; + rqst.rq_nvec = 2; rqst.rq_pages = pages; rqst.rq_npages = npages; rqst.rq_pagesz = PAGE_SIZE; @@ -2467,7 +2609,7 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, if (rc) return rc; - memmove(buf, iov[2].iov_base, buf_data_size); + memmove(buf, iov[1].iov_base, buf_data_size); server->total_read = buf_data_size + page_data_size; @@ -3170,6 +3312,7 @@ struct smb_version_operations smb311_operations = { .set_compression = smb2_set_compression, .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, + .posix_mkdir = smb311_posix_mkdir, .rmdir = smb2_rmdir, .unlink = smb2_unlink, .rename = smb2_rename_path, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index af032e1a3eac..3c92678cb45b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -155,7 +155,7 @@ out: static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) { - int rc = 0; + int rc; struct nls_table *nls_codepage; struct cifs_ses *ses; struct TCP_Server_Info *server; @@ -166,10 +166,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) * for those three - in the calling routine. */ if (tcon == NULL) - return rc; + return 0; if (smb2_command == SMB2_TREE_CONNECT) - return rc; + return 0; if (tcon->tidStatus == CifsExiting) { /* @@ -212,8 +212,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) return -EAGAIN; } - wait_event_interruptible_timeout(server->response_q, - (server->tcpStatus != CifsNeedReconnect), 10 * HZ); + rc = wait_event_interruptible_timeout(server->response_q, + (server->tcpStatus != CifsNeedReconnect), + 10 * HZ); + if (rc < 0) { + cifs_dbg(FYI, "%s: aborting reconnect due to a received" + " signal by the process\n", __func__); + return -ERESTARTSYS; + } /* are we still trying to reconnect? */ if (server->tcpStatus != CifsNeedReconnect) @@ -231,7 +237,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } if (!tcon->ses->need_reconnect && !tcon->need_reconnect) - return rc; + return 0; nls_codepage = load_nls_default(); @@ -340,7 +346,10 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, return rc; /* BB eventually switch this to SMB2 specific small buf size */ - *request_buf = cifs_small_buf_get(); + if (smb2_command == SMB2_SET_INFO) + *request_buf = cifs_buf_get(); + else + *request_buf = cifs_small_buf_get(); if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ return -ENOMEM; @@ -602,6 +611,7 @@ static void assemble_neg_contexts(struct smb2_negotiate_req *req, int SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) { + struct smb_rqst rqst; struct smb2_negotiate_req *req; struct smb2_negotiate_rsp *rsp; struct kvec iov[1]; @@ -673,7 +683,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; - rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_negotiate_rsp *)rsp_iov.iov_base; /* @@ -990,8 +1004,9 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) req->PreviousSessionId = sess_data->previous_session; req->Flags = 0; /* MBZ */ - /* to enable echos and oplocks */ - req->sync_hdr.CreditRequest = cpu_to_le16(3); + + /* enough to enable echos and oplocks and one max size write */ + req->sync_hdr.CreditRequest = cpu_to_le16(130); /* only one of SMB2 signing flags may be set in SMB2 request */ if (server->sign) @@ -1027,6 +1042,7 @@ static int SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data) { int rc; + struct smb_rqst rqst; struct smb2_sess_setup_req *req = sess_data->iov[0].iov_base; struct kvec rsp_iov = { NULL, 0 }; @@ -1035,10 +1051,13 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data) cpu_to_le16(sizeof(struct smb2_sess_setup_req) - 1 /* pad */); req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len); - /* BB add code to build os and lm fields */ + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = sess_data->iov; + rqst.rq_nvec = 2; - rc = smb2_send_recv(sess_data->xid, sess_data->ses, - sess_data->iov, 2, + /* BB add code to build os and lm fields */ + rc = cifs_send_recv(sess_data->xid, sess_data->ses, + &rqst, &sess_data->buf0_type, CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov); cifs_small_buf_release(sess_data->iov[0].iov_base); @@ -1376,6 +1395,7 @@ out: int SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) { + struct smb_rqst rqst; struct smb2_logoff_req *req; /* response is also trivial struct */ int rc = 0; struct TCP_Server_Info *server; @@ -1413,7 +1433,11 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; - rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); /* * No tcon so can't do @@ -1443,6 +1467,7 @@ int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, const struct nls_table *cp) { + struct smb_rqst rqst; struct smb2_tree_connect_req *req; struct smb2_tree_connect_rsp *rsp = NULL; struct kvec iov[2]; @@ -1499,7 +1524,11 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, !smb3_encryption_required(tcon)) req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; - rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 2; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base; @@ -1563,6 +1592,7 @@ tcon_error_exit: int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) { + struct smb_rqst rqst; struct smb2_tree_disconnect_req *req; /* response is trivial */ int rc = 0; struct cifs_ses *ses = tcon->ses; @@ -1593,7 +1623,11 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; - rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); if (rc) cifs_stats_fail_inc(tcon, SMB2_TREE_DISCONNECT_HE); @@ -1682,12 +1716,12 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, static int add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int *num_iovec, __u8 *oplock) + unsigned int *num_iovec, u8 *lease_key, __u8 *oplock) { struct smb2_create_req *req = iov[0].iov_base; unsigned int num = *num_iovec; - iov[num].iov_base = server->ops->create_lease_buf(oplock+1, *oplock); + iov[num].iov_base = server->ops->create_lease_buf(lease_key, *oplock); if (iov[num].iov_base == NULL) return -ENOMEM; iov[num].iov_len = server->vals->create_lease_size; @@ -1886,11 +1920,165 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, return 0; } +#ifdef CONFIG_CIFS_SMB311 +int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, + umode_t mode, struct cifs_tcon *tcon, + const char *full_path, + struct cifs_sb_info *cifs_sb) +{ + struct smb_rqst rqst; + struct smb2_create_req *req; + struct smb2_create_rsp *rsp; + struct TCP_Server_Info *server; + struct cifs_ses *ses = tcon->ses; + struct kvec iov[3]; /* make sure at least one for each open context */ + struct kvec rsp_iov = {NULL, 0}; + int resp_buftype; + int uni_path_len; + __le16 *copy_path = NULL; + int copy_size; + int rc = 0; + unsigned int n_iov = 2; + __u32 file_attributes = 0; + char *pc_buf = NULL; + int flags = 0; + unsigned int total_len; + __le16 *path = cifs_convert_path_to_utf16(full_path, cifs_sb); + + if (!path) + return -ENOMEM; + + cifs_dbg(FYI, "mkdir\n"); + + if (ses && (ses->server)) + server = ses->server; + else + return -EIO; + + rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len); + + if (rc) + return rc; + + if (smb3_encryption_required(tcon)) + flags |= CIFS_TRANSFORM_REQ; + + + req->ImpersonationLevel = IL_IMPERSONATION; + req->DesiredAccess = cpu_to_le32(FILE_WRITE_ATTRIBUTES); + /* File attributes ignored on open (used in create though) */ + req->FileAttributes = cpu_to_le32(file_attributes); + req->ShareAccess = FILE_SHARE_ALL_LE; + req->CreateDisposition = cpu_to_le32(FILE_CREATE); + req->CreateOptions = cpu_to_le32(CREATE_NOT_FILE); + + iov[0].iov_base = (char *)req; + /* -1 since last byte is buf[0] which is sent below (path) */ + iov[0].iov_len = total_len - 1; + + req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req)); + + /* [MS-SMB2] 2.2.13 NameOffset: + * If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of + * the SMB2 header, the file name includes a prefix that will + * be processed during DFS name normalization as specified in + * section 3.3.5.9. Otherwise, the file name is relative to + * the share that is identified by the TreeId in the SMB2 + * header. + */ + if (tcon->share_flags & SHI1005_FLAGS_DFS) { + int name_len; + + req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS; + rc = alloc_path_with_tree_prefix(©_path, ©_size, + &name_len, + tcon->treeName, path); + if (rc) { + cifs_small_buf_release(req); + return rc; + } + req->NameLength = cpu_to_le16(name_len * 2); + uni_path_len = copy_size; + path = copy_path; + } else { + uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; + /* MUST set path len (NameLength) to 0 opening root of share */ + req->NameLength = cpu_to_le16(uni_path_len - 2); + if (uni_path_len % 8 != 0) { + copy_size = roundup(uni_path_len, 8); + copy_path = kzalloc(copy_size, GFP_KERNEL); + if (!copy_path) { + cifs_small_buf_release(req); + return -ENOMEM; + } + memcpy((char *)copy_path, (const char *)path, + uni_path_len); + uni_path_len = copy_size; + path = copy_path; + } + } + + iov[1].iov_len = uni_path_len; + iov[1].iov_base = path; + req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE; + + if (tcon->posix_extensions) { + if (n_iov > 2) { + struct create_context *ccontext = + (struct create_context *)iov[n_iov-1].iov_base; + ccontext->Next = + cpu_to_le32(iov[n_iov-1].iov_len); + } + + rc = add_posix_context(iov, &n_iov, mode); + if (rc) { + cifs_small_buf_release(req); + kfree(copy_path); + return rc; + } + pc_buf = iov[n_iov-1].iov_base; + } + + + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = n_iov; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, + &rsp_iov); + + cifs_small_buf_release(req); + rsp = (struct smb2_create_rsp *)rsp_iov.iov_base; + + if (rc != 0) { + cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); + trace_smb3_posix_mkdir_err(xid, tcon->tid, ses->Suid, + CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES, rc); + goto smb311_mkdir_exit; + } else + trace_smb3_posix_mkdir_done(xid, rsp->PersistentFileId, tcon->tid, + ses->Suid, CREATE_NOT_FILE, + FILE_WRITE_ATTRIBUTES); + + SMB2_close(xid, tcon, rsp->PersistentFileId, rsp->VolatileFileId); + + /* Eventually save off posix specific response info and timestaps */ + +smb311_mkdir_exit: + kfree(copy_path); + kfree(pc_buf); + free_rsp_buf(resp_buftype, rsp); + return rc; + +} +#endif /* SMB311 */ + int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, __u8 *oplock, struct smb2_file_all_info *buf, struct kvec *err_iov, int *buftype) { + struct smb_rqst rqst; struct smb2_create_req *req; struct smb2_create_rsp *rsp; struct TCP_Server_Info *server; @@ -1993,7 +2181,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, *oplock == SMB2_OPLOCK_LEVEL_NONE) req->RequestedOplockLevel = *oplock; else { - rc = add_lease_context(server, iov, &n_iov, oplock); + rc = add_lease_context(server, iov, &n_iov, + oparms->fid->lease_key, oplock); if (rc) { cifs_small_buf_release(req); kfree(copy_path); @@ -2043,7 +2232,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } #endif /* SMB311 */ - rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags, + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = n_iov; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_create_rsp *)rsp_iov.iov_base; @@ -2099,6 +2292,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, char *in_data, u32 indatalen, char **out_data, u32 *plen /* returned data len */) { + struct smb_rqst rqst; struct smb2_ioctl_req *req; struct smb2_ioctl_rsp *rsp; struct cifs_ses *ses; @@ -2189,7 +2383,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; - rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags, + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = n_iov; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base; @@ -2274,6 +2472,7 @@ int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, int flags) { + struct smb_rqst rqst; struct smb2_close_req *req; struct smb2_close_rsp *rsp; struct cifs_ses *ses = tcon->ses; @@ -2301,7 +2500,11 @@ SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon, iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; - rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_close_rsp *)rsp_iov.iov_base; @@ -2387,6 +2590,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, u32 additional_info, size_t output_len, size_t min_len, void **data, u32 *dlen) { + struct smb_rqst rqst; struct smb2_query_info_req *req; struct smb2_query_info_rsp *rsp = NULL; struct kvec iov[2]; @@ -2427,7 +2631,11 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, /* 1 for Buffer */ iov[0].iov_len = total_len - 1; - rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; @@ -2594,11 +2802,10 @@ SMB2_echo(struct TCP_Server_Info *server) { struct smb2_echo_req *req; int rc = 0; - struct kvec iov[2]; + struct kvec iov[1]; struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = 2 }; + .rq_nvec = 1 }; unsigned int total_len; - __be32 rfc1002_marker; cifs_dbg(FYI, "In echo request\n"); @@ -2614,11 +2821,8 @@ SMB2_echo(struct TCP_Server_Info *server) req->sync_hdr.CreditRequest = cpu_to_le16(1); - iov[0].iov_len = 4; - rfc1002_marker = cpu_to_be32(total_len); - iov[0].iov_base = &rfc1002_marker; - iov[1].iov_len = total_len; - iov[1].iov_base = (char *)req; + iov[0].iov_len = total_len; + iov[0].iov_base = (char *)req; rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, NULL, server, CIFS_ECHO_OP); @@ -2633,6 +2837,7 @@ int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid) { + struct smb_rqst rqst; struct smb2_flush_req *req; struct cifs_ses *ses = tcon->ses; struct kvec iov[1]; @@ -2660,7 +2865,11 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; - rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); if (rc != 0) { @@ -2848,10 +3057,9 @@ smb2_async_readv(struct cifs_readdata *rdata) struct smb2_sync_hdr *shdr; struct cifs_io_parms io_parms; struct smb_rqst rqst = { .rq_iov = rdata->iov, - .rq_nvec = 2 }; + .rq_nvec = 1 }; struct TCP_Server_Info *server; unsigned int total_len; - __be32 req_len; cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", __func__, rdata->offset, rdata->bytes); @@ -2882,12 +3090,8 @@ smb2_async_readv(struct cifs_readdata *rdata) if (smb3_encryption_required(io_parms.tcon)) flags |= CIFS_TRANSFORM_REQ; - req_len = cpu_to_be32(total_len); - - rdata->iov[0].iov_base = &req_len; - rdata->iov[0].iov_len = sizeof(__be32); - rdata->iov[1].iov_base = buf; - rdata->iov[1].iov_len = total_len; + rdata->iov[0].iov_base = buf; + rdata->iov[0].iov_len = total_len; shdr = (struct smb2_sync_hdr *)buf; @@ -2926,6 +3130,7 @@ int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, char **buf, int *buf_type) { + struct smb_rqst rqst; int resp_buftype, rc = -EACCES; struct smb2_read_plain_req *req = NULL; struct smb2_read_rsp *rsp = NULL; @@ -2946,7 +3151,11 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; - rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_read_rsp *)rsp_iov.iov_base; @@ -3062,10 +3271,9 @@ smb2_async_writev(struct cifs_writedata *wdata, struct smb2_sync_hdr *shdr; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; - struct kvec iov[2]; + struct kvec iov[1]; struct smb_rqst rqst = { }; unsigned int total_len; - __be32 rfc1002_marker; rc = smb2_plain_req_init(SMB2_WRITE, tcon, (void **) &req, &total_len); if (rc) { @@ -3137,15 +3345,11 @@ smb2_async_writev(struct cifs_writedata *wdata, v1->length = cpu_to_le32(wdata->mr->mr->length); } #endif - /* 4 for rfc1002 length field and 1 for Buffer */ - iov[0].iov_len = 4; - rfc1002_marker = cpu_to_be32(total_len - 1 + wdata->bytes); - iov[0].iov_base = &rfc1002_marker; - iov[1].iov_len = total_len - 1; - iov[1].iov_base = (char *)req; + iov[0].iov_len = total_len - 1; + iov[0].iov_base = (char *)req; rqst.rq_iov = iov; - rqst.rq_nvec = 2; + rqst.rq_nvec = 1; rqst.rq_pages = wdata->pages; rqst.rq_offset = wdata->page_offset; rqst.rq_npages = wdata->nr_pages; @@ -3153,7 +3357,7 @@ smb2_async_writev(struct cifs_writedata *wdata, rqst.rq_tailsz = wdata->tailsz; #ifdef CONFIG_CIFS_SMB_DIRECT if (wdata->mr) { - iov[1].iov_len += sizeof(struct smbd_buffer_descriptor_v1); + iov[0].iov_len += sizeof(struct smbd_buffer_descriptor_v1); rqst.rq_npages = 0; } #endif @@ -3210,6 +3414,7 @@ int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, struct kvec *iov, int n_vec) { + struct smb_rqst rqst; int rc = 0; struct smb2_write_req *req = NULL; struct smb2_write_rsp *rsp = NULL; @@ -3251,7 +3456,11 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, /* 1 for Buffer */ iov[0].iov_len = total_len - 1; - rc = smb2_send_recv(xid, io_parms->tcon->ses, iov, n_vec + 1, + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = n_vec + 1; + + rc = cifs_send_recv(xid, io_parms->tcon->ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_write_rsp *)rsp_iov.iov_base; @@ -3323,6 +3532,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, int index, struct cifs_search_info *srch_inf) { + struct smb_rqst rqst; struct smb2_query_directory_req *req; struct smb2_query_directory_rsp *rsp = NULL; struct kvec iov[2]; @@ -3395,7 +3605,11 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, iov[1].iov_base = (char *)(req->Buffer); iov[1].iov_len = len; - rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 2; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base; @@ -3454,6 +3668,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, u8 info_type, u32 additional_info, unsigned int num, void **data, unsigned int *size) { + struct smb_rqst rqst; struct smb2_set_info_req *req; struct smb2_set_info_rsp *rsp = NULL; struct kvec *iov; @@ -3509,9 +3724,13 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, iov[i].iov_len = size[i]; } - rc = smb2_send_recv(xid, ses, iov, num, &resp_buftype, flags, + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = num; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); + cifs_buf_release(req); rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base; if (rc != 0) { @@ -3664,6 +3883,7 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, const u64 persistent_fid, const u64 volatile_fid, __u8 oplock_level) { + struct smb_rqst rqst; int rc; struct smb2_oplock_break *req = NULL; struct cifs_ses *ses = tcon->ses; @@ -3692,7 +3912,11 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; - rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); if (rc) { @@ -3755,6 +3979,7 @@ int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata) { + struct smb_rqst rqst; struct smb2_query_info_rsp *rsp = NULL; struct kvec iov; struct kvec rsp_iov; @@ -3773,7 +3998,11 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = &iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(iov.iov_base); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); @@ -3798,6 +4027,7 @@ int SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, int level) { + struct smb_rqst rqst; struct smb2_query_info_rsp *rsp = NULL; struct kvec iov; struct kvec rsp_iov; @@ -3829,7 +4059,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = &iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(iov.iov_base); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); @@ -3868,6 +4102,7 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid, const __u32 num_lock, struct smb2_lock_element *buf) { + struct smb_rqst rqst; int rc = 0; struct smb2_lock_req *req = NULL; struct kvec iov[2]; @@ -3900,7 +4135,12 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, iov[1].iov_len = count; cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); - rc = smb2_send_recv(xid, tcon->ses, iov, 2, &resp_buf_type, flags, + + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 2; + + rc = cifs_send_recv(xid, tcon->ses, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); if (rc) { @@ -3934,6 +4174,7 @@ int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, __u8 *lease_key, const __le32 lease_state) { + struct smb_rqst rqst; int rc; struct smb2_lease_ack *req = NULL; struct cifs_ses *ses = tcon->ses; @@ -3964,7 +4205,11 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; - rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = iov; + rqst.rq_nvec = 1; + + rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); if (rc) { diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index a345560001ce..a671adcc44a6 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -678,16 +678,14 @@ struct create_context { #define SMB2_LEASE_KEY_SIZE 16 struct lease_context { - __le64 LeaseKeyLow; - __le64 LeaseKeyHigh; + u8 LeaseKey[SMB2_LEASE_KEY_SIZE]; __le32 LeaseState; __le32 LeaseFlags; __le64 LeaseDuration; } __packed; struct lease_context_v2 { - __le64 LeaseKeyLow; - __le64 LeaseKeyHigh; + u8 LeaseKey[SMB2_LEASE_KEY_SIZE]; __le32 LeaseState; __le32 LeaseFlags; __le64 LeaseDuration; @@ -851,8 +849,11 @@ struct validate_negotiate_info_rsp { __le16 Dialect; /* Dialect in use for the connection */ } __packed; -#define RSS_CAPABLE 0x00000001 -#define RDMA_CAPABLE 0x00000002 +#define RSS_CAPABLE cpu_to_le32(0x00000001) +#define RDMA_CAPABLE cpu_to_le32(0x00000002) + +#define INTERNETWORK cpu_to_le16(0x0002) +#define INTERNETWORKV6 cpu_to_le16(0x0017) struct network_interface_info_ioctl_rsp { __le32 Next; /* next interface. zero if this is last one */ @@ -860,7 +861,21 @@ struct network_interface_info_ioctl_rsp { __le32 Capability; /* RSS or RDMA Capable */ __le32 Reserved; __le64 LinkSpeed; - char SockAddr_Storage[128]; + __le16 Family; + __u8 Buffer[126]; +} __packed; + +struct iface_info_ipv4 { + __be16 Port; + __be32 IPv4Address; + __be64 Reserved; +} __packed; + +struct iface_info_ipv6 { + __be16 Port; + __be32 FlowInfo; + __u8 IPv6Address[16]; + __be32 ScopeId; } __packed; #define NO_FILE_ID 0xFFFFFFFFFFFFFFFFULL /* general ioctls to srv not to file */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index c84020057bd8..6e6a4f2ec890 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -79,6 +79,10 @@ extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, bool set_alloc); extern int smb2_set_file_info(struct inode *inode, const char *full_path, FILE_BASIC_INFO *buf, const unsigned int xid); +extern int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, + umode_t mode, struct cifs_tcon *tcon, + const char *full_path, + struct cifs_sb_info *cifs_sb); extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb); extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, @@ -109,6 +113,8 @@ extern int smb2_unlock_range(struct cifsFileInfo *cfile, extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); extern void smb2_reconnect_server(struct work_struct *work); extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server); +extern unsigned long smb_rqst_len(struct TCP_Server_Info *server, + struct smb_rqst *rqst); /* * SMB2 Worker functions - most of protocol specific implementation details diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 349d5ccf854c..719d55e63d88 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -171,10 +171,10 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; unsigned char *sigptr = smb2_signature; struct kvec *iov = rqst->rq_iov; - int iov_hdr_index = rqst->rq_nvec > 1 ? 1 : 0; - struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)iov[iov_hdr_index].iov_base; + struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; struct cifs_ses *ses; + struct shash_desc *shash = &server->secmech.sdeschmacsha256->shash; + struct smb_rqst drqst; ses = smb2_find_smb_ses(server, shdr->SessionId); if (!ses) { @@ -192,21 +192,39 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) } rc = crypto_shash_setkey(server->secmech.hmacsha256, - ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; } - rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); + rc = crypto_shash_init(shash); if (rc) { cifs_dbg(VFS, "%s: Could not init sha256", __func__); return rc; } - rc = __cifs_calc_signature(rqst, iov_hdr_index, server, sigptr, - &server->secmech.sdeschmacsha256->shash); + /* + * For SMB2+, __cifs_calc_signature() expects to sign only the actual + * data, that is, iov[0] should not contain a rfc1002 length. + * + * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to + * __cifs_calc_signature(). + */ + drqst = *rqst; + if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) { + rc = crypto_shash_update(shash, iov[0].iov_base, + iov[0].iov_len); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with payload\n", + __func__); + return rc; + } + drqst.rq_iov++; + drqst.rq_nvec--; + } + rc = __cifs_calc_signature(&drqst, server, sigptr, shash); if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); @@ -410,14 +428,14 @@ generate_smb311signingkey(struct cifs_ses *ses) int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - int rc = 0; + int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; - int iov_hdr_index = rqst->rq_nvec > 1 ? 1 : 0; - struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)iov[iov_hdr_index].iov_base; + struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; struct cifs_ses *ses; + struct shash_desc *shash = &server->secmech.sdesccmacaes->shash; + struct smb_rqst drqst; ses = smb2_find_smb_ses(server, shdr->SessionId); if (!ses) { @@ -429,8 +447,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); rc = crypto_shash_setkey(server->secmech.cmacaes, - ses->smb3signingkey, SMB2_CMACAES_SIZE); - + ses->smb3signingkey, SMB2_CMACAES_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); return rc; @@ -441,15 +458,33 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) * so unlike smb2 case we do not have to check here if secmech are * initialized */ - rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash); + rc = crypto_shash_init(shash); if (rc) { cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__); return rc; } - rc = __cifs_calc_signature(rqst, iov_hdr_index, server, sigptr, - &server->secmech.sdesccmacaes->shash); + /* + * For SMB2+, __cifs_calc_signature() expects to sign only the actual + * data, that is, iov[0] should not contain a rfc1002 length. + * + * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to + * __cifs_calc_signature(). + */ + drqst = *rqst; + if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) { + rc = crypto_shash_update(shash, iov[0].iov_base, + iov[0].iov_len); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with payload\n", + __func__); + return rc; + } + drqst.rq_iov++; + drqst.rq_nvec--; + } + rc = __cifs_calc_signature(&drqst, server, sigptr, shash); if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); @@ -462,7 +497,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) { int rc = 0; struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base; + (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base; if (!(shdr->Flags & SMB2_FLAGS_SIGNED) || server->tcpStatus == CifsNeedNegotiate) @@ -552,6 +587,7 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr, temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); temp->mid = le64_to_cpu(shdr->MessageId); temp->pid = current->pid; temp->command = shdr->Command; /* Always LE */ @@ -635,7 +671,7 @@ smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) { int rc; struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base; + (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; smb2_seq_num_into_buf(ses->server, shdr); @@ -656,7 +692,7 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) { int rc; struct smb2_sync_hdr *shdr = - (struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base; + (struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base; struct mid_q_entry *mid; smb2_seq_num_into_buf(server, shdr); diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index e459c97151b3..c55ea4e6201b 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -18,6 +18,7 @@ #include "smbdirect.h" #include "cifs_debug.h" #include "cifsproto.h" +#include "smb2proto.h" static struct smbd_response *get_empty_queue_buffer( struct smbd_connection *info); @@ -2082,12 +2083,13 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) * rqst: the data to write * return value: 0 if successfully write, otherwise error code */ -int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) +int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst) { + struct smbd_connection *info = server->smbd_conn; struct kvec vec; int nvecs; int size; - unsigned int buflen = 0, remaining_data_length; + unsigned int buflen, remaining_data_length; int start, i, j; int max_iov_size = info->max_send_size - sizeof(struct smbd_data_transfer); @@ -2111,25 +2113,13 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) log_write(ERR, "expected the pdu length in 1st iov, but got %zu\n", rqst->rq_iov[0].iov_len); return -EINVAL; } - iov = &rqst->rq_iov[1]; - - /* total up iov array first */ - for (i = 0; i < rqst->rq_nvec-1; i++) { - buflen += iov[i].iov_len; - } /* * Add in the page array if there is one. The caller needs to set * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and * ends at page boundary */ - if (rqst->rq_npages) { - if (rqst->rq_npages == 1) - buflen += rqst->rq_tailsz; - else - buflen += rqst->rq_pagesz * (rqst->rq_npages - 1) - - rqst->rq_offset + rqst->rq_tailsz; - } + buflen = smb_rqst_len(server, rqst); if (buflen + sizeof(struct smbd_data_transfer) > info->max_fragmented_send_size) { @@ -2139,6 +2129,8 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) goto done; } + iov = &rqst->rq_iov[1]; + cifs_dbg(FYI, "Sending smb (RDMA): smb_len=%u\n", buflen); for (i = 0; i < rqst->rq_nvec-1; i++) dump_smb(iov[i].iov_base, iov[i].iov_len); diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index 1e419c21dc60..a11096254f29 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -292,7 +292,7 @@ void smbd_destroy(struct smbd_connection *info); /* Interface for carrying upper layer I/O through send/recv */ int smbd_recv(struct smbd_connection *info, struct msghdr *msg); -int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst); +int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst); enum mr_state { MR_READY, @@ -332,7 +332,7 @@ static inline void *smbd_get_connection( static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; } static inline void smbd_destroy(struct smbd_connection *info) {} static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; } -static inline int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) {return -1; } +static inline int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst) {return -1; } #endif #endif diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index 61e74d455d90..67e413f6ee4d 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -378,7 +378,7 @@ DEFINE_EVENT(smb3_open_err_class, smb3_##name, \ TP_ARGS(xid, tid, sesid, create_options, desired_access, rc)) DEFINE_SMB3_OPEN_ERR_EVENT(open_err); - +DEFINE_SMB3_OPEN_ERR_EVENT(posix_mkdir_err); DECLARE_EVENT_CLASS(smb3_open_done_class, TP_PROTO(unsigned int xid, @@ -420,6 +420,7 @@ DEFINE_EVENT(smb3_open_done_class, smb3_##name, \ TP_ARGS(xid, fid, tid, sesid, create_options, desired_access)) DEFINE_SMB3_OPEN_DONE_EVENT(open_done); +DEFINE_SMB3_OPEN_DONE_EVENT(posix_mkdir_done); #endif /* _CIFS_TRACE_H */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 1f1a68f89110..a341ec839c83 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -61,6 +61,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); temp->mid = get_mid(smb_buffer); temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); @@ -82,6 +83,21 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) return temp; } +static void _cifs_mid_q_entry_release(struct kref *refcount) +{ + struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry, + refcount); + + mempool_free(mid, cifs_mid_poolp); +} + +void cifs_mid_q_entry_release(struct mid_q_entry *midEntry) +{ + spin_lock(&GlobalMid_Lock); + kref_put(&midEntry->refcount, _cifs_mid_q_entry_release); + spin_unlock(&GlobalMid_Lock); +} + void DeleteMidQEntry(struct mid_q_entry *midEntry) { @@ -110,7 +126,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) } } #endif - mempool_free(midEntry, cifs_mid_poolp); + cifs_mid_q_entry_release(midEntry); } void @@ -201,15 +217,25 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg, return 0; } -static unsigned long -rqst_len(struct smb_rqst *rqst) +unsigned long +smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) { unsigned int i; - struct kvec *iov = rqst->rq_iov; + struct kvec *iov; + int nvec; unsigned long buflen = 0; + if (server->vals->header_preamble_size == 0 && + rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) { + iov = &rqst->rq_iov[1]; + nvec = rqst->rq_nvec - 1; + } else { + iov = rqst->rq_iov; + nvec = rqst->rq_nvec; + } + /* total up iov array first */ - for (i = 0; i < rqst->rq_nvec; i++) + for (i = 0; i < nvec; i++) buflen += iov[i].iov_len; /* @@ -236,70 +262,88 @@ rqst_len(struct smb_rqst *rqst) } static int -__smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) +__smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, + struct smb_rqst *rqst) { - int rc; - struct kvec *iov = rqst->rq_iov; - int n_vec = rqst->rq_nvec; - unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); - unsigned long send_length; - unsigned int i; + int rc = 0; + struct kvec *iov; + int n_vec; + unsigned int send_length = 0; + unsigned int i, j; size_t total_len = 0, sent, size; struct socket *ssocket = server->ssocket; struct msghdr smb_msg; int val = 1; + __be32 rfc1002_marker; + if (cifs_rdma_enabled(server) && server->smbd_conn) { - rc = smbd_send(server->smbd_conn, rqst); + rc = smbd_send(server, rqst); goto smbd_done; } if (ssocket == NULL) return -ENOTSOCK; - /* sanity check send length */ - send_length = rqst_len(rqst); - if (send_length != smb_buf_length + 4) { - WARN(1, "Send length mismatch(send_length=%lu smb_buf_length=%u)\n", - send_length, smb_buf_length); - return -EIO; - } - - if (n_vec < 2) - return -EIO; - - cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length); - dump_smb(iov[0].iov_base, iov[0].iov_len); - dump_smb(iov[1].iov_base, iov[1].iov_len); - /* cork the socket */ kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, (char *)&val, sizeof(val)); - size = 0; - for (i = 0; i < n_vec; i++) - size += iov[i].iov_len; + for (j = 0; j < num_rqst; j++) + send_length += smb_rqst_len(server, &rqst[j]); + rfc1002_marker = cpu_to_be32(send_length); - iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, iov, n_vec, size); + /* Generate a rfc1002 marker for SMB2+ */ + if (server->vals->header_preamble_size == 0) { + struct kvec hiov = { + .iov_base = &rfc1002_marker, + .iov_len = 4 + }; + iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, &hiov, + 1, 4); + rc = smb_send_kvec(server, &smb_msg, &sent); + if (rc < 0) + goto uncork; - rc = smb_send_kvec(server, &smb_msg, &sent); - if (rc < 0) - goto uncork; + total_len += sent; + send_length += 4; + } - total_len += sent; + cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length); - /* now walk the page array and send each page in it */ - for (i = 0; i < rqst->rq_npages; i++) { - struct bio_vec bvec; + for (j = 0; j < num_rqst; j++) { + iov = rqst[j].rq_iov; + n_vec = rqst[j].rq_nvec; + + size = 0; + for (i = 0; i < n_vec; i++) { + dump_smb(iov[i].iov_base, iov[i].iov_len); + size += iov[i].iov_len; + } - bvec.bv_page = rqst->rq_pages[i]; - rqst_page_get_length(rqst, i, &bvec.bv_len, &bvec.bv_offset); + iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, + iov, n_vec, size); - iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC, - &bvec, 1, bvec.bv_len); rc = smb_send_kvec(server, &smb_msg, &sent); if (rc < 0) - break; + goto uncork; total_len += sent; + + /* now walk the page array and send each page in it */ + for (i = 0; i < rqst[j].rq_npages; i++) { + struct bio_vec bvec; + + bvec.bv_page = rqst[j].rq_pages[i]; + rqst_page_get_length(&rqst[j], i, &bvec.bv_len, + &bvec.bv_offset); + + iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC, + &bvec, 1, bvec.bv_len); + rc = smb_send_kvec(server, &smb_msg, &sent); + if (rc < 0) + break; + + total_len += sent; + } } uncork: @@ -308,9 +352,9 @@ uncork: kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK, (char *)&val, sizeof(val)); - if ((total_len > 0) && (total_len != smb_buf_length + 4)) { + if ((total_len > 0) && (total_len != send_length)) { cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n", - smb_buf_length + 4, total_len); + send_length, total_len); /* * If we have only sent part of an SMB then the next SMB could * be taken as the remainder of this one. We need to kill the @@ -335,7 +379,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags) int rc; if (!(flags & CIFS_TRANSFORM_REQ)) - return __smb_send_rqst(server, rqst); + return __smb_send_rqst(server, 1, rqst); if (!server->ops->init_transform_rq || !server->ops->free_transform_rq) { @@ -347,7 +391,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags) if (rc) return rc; - rc = __smb_send_rqst(server, &cur_rqst); + rc = __smb_send_rqst(server, 1, &cur_rqst); server->ops->free_transform_rq(&cur_rqst); return rc; } @@ -365,7 +409,7 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, iov[1].iov_base = (char *)smb_buffer + 4; iov[1].iov_len = smb_buf_length; - return __smb_send_rqst(server, &rqst); + return __smb_send_rqst(server, 1, &rqst); } static int @@ -730,7 +774,6 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, * to the same server. We may make this configurable later or * use ses->maxReq. */ - rc = wait_for_free_request(ses->server, timeout, optype); if (rc) return rc; @@ -766,8 +809,8 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, #ifdef CONFIG_CIFS_SMB311 if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) - smb311_update_preauth_hash(ses, rqst->rq_iov+1, - rqst->rq_nvec-1); + smb311_update_preauth_hash(ses, rqst->rq_iov, + rqst->rq_nvec); #endif if (timeout == CIFS_ASYNC_OP) @@ -812,8 +855,8 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, #ifdef CONFIG_CIFS_SMB311 if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) { struct kvec iov = { - .iov_base = buf, - .iov_len = midQ->resp_buf_size + .iov_base = resp_iov->iov_base, + .iov_len = resp_iov->iov_len }; smb311_update_preauth_hash(ses, &iov, 1); } @@ -872,49 +915,6 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, return rc; } -/* Like SendReceive2 but iov[0] does not contain an rfc1002 header */ -int -smb2_send_recv(const unsigned int xid, struct cifs_ses *ses, - struct kvec *iov, int n_vec, int *resp_buf_type /* ret */, - const int flags, struct kvec *resp_iov) -{ - struct smb_rqst rqst; - struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov; - int rc; - int i; - __u32 count; - __be32 rfc1002_marker; - - if (n_vec + 1 > CIFS_MAX_IOV_SIZE) { - new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec), - GFP_KERNEL); - if (!new_iov) - return -ENOMEM; - } else - new_iov = s_iov; - - /* 1st iov is an RFC1002 Session Message length */ - memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); - - count = 0; - for (i = 1; i < n_vec + 1; i++) - count += new_iov[i].iov_len; - - rfc1002_marker = cpu_to_be32(count); - - new_iov[0].iov_base = &rfc1002_marker; - new_iov[0].iov_len = 4; - - memset(&rqst, 0, sizeof(struct smb_rqst)); - rqst.rq_iov = new_iov; - rqst.rq_nvec = n_vec + 1; - - rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov); - if (n_vec + 1 > CIFS_MAX_IOV_SIZE) - kfree(new_iov); - return rc; -} - int SendReceive(const unsigned int xid, struct cifs_ses *ses, struct smb_hdr *in_buf, struct smb_hdr *out_buf, diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index ca599df0dcb1..f3d543dd9a98 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -105,11 +105,11 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) if (attr->va_size != -1) inode->i_blocks = (attr->va_size + 511) >> 9; if (attr->va_atime.tv_sec != -1) - inode->i_atime = attr->va_atime; + inode->i_atime = timespec_to_timespec64(attr->va_atime); if (attr->va_mtime.tv_sec != -1) - inode->i_mtime = attr->va_mtime; + inode->i_mtime = timespec_to_timespec64(attr->va_mtime); if (attr->va_ctime.tv_sec != -1) - inode->i_ctime = attr->va_ctime; + inode->i_ctime = timespec_to_timespec64(attr->va_ctime); } @@ -175,13 +175,13 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr) vattr->va_size = iattr->ia_size; } if ( valid & ATTR_ATIME ) { - vattr->va_atime = iattr->ia_atime; + vattr->va_atime = timespec64_to_timespec(iattr->ia_atime); } if ( valid & ATTR_MTIME ) { - vattr->va_mtime = iattr->ia_mtime; + vattr->va_mtime = timespec64_to_timespec(iattr->ia_mtime); } if ( valid & ATTR_CTIME ) { - vattr->va_ctime = iattr->ia_ctime; + vattr->va_ctime = timespec64_to_timespec(iattr->ia_ctime); } } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index ad718e5e37bb..28ef9e528853 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -90,14 +90,14 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) if (ia_valid & ATTR_GID) sd_iattr->ia_gid = iattr->ia_gid; if (ia_valid & ATTR_ATIME) - sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime, - inode->i_sb->s_time_gran); + sd_iattr->ia_atime = timespec64_trunc(iattr->ia_atime, + inode->i_sb->s_time_gran); if (ia_valid & ATTR_MTIME) - sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime, - inode->i_sb->s_time_gran); + sd_iattr->ia_mtime = timespec64_trunc(iattr->ia_mtime, + inode->i_sb->s_time_gran); if (ia_valid & ATTR_CTIME) - sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime, - inode->i_sb->s_time_gran); + sd_iattr->ia_ctime = timespec64_trunc(iattr->ia_ctime, + inode->i_sb->s_time_gran); if (ia_valid & ATTR_MODE) { umode_t mode = iattr->ia_mode; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index c4fb9ad7c808..f408994fc632 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -90,7 +90,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, const struct cramfs_inode *cramfs_inode, unsigned int offset) { struct inode *inode; - static struct timespec zerotime; + static struct timespec64 zerotime; inode = iget_locked(sb, cramino(cramfs_inode, offset)); if (!inode) diff --git a/fs/dcache.c b/fs/dcache.c index 0e8e5de3c48a..8d2ec4898c2b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -358,14 +358,11 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; - bool hashed = !d_unhashed(dentry); - if (hashed) - raw_write_seqcount_begin(&dentry->d_seq); + raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - if (hashed) - raw_write_seqcount_end(&dentry->d_seq); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -732,16 +729,16 @@ static inline bool fast_dput(struct dentry *dentry) if (dentry->d_lockref.count > 1) { dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); - return 1; + return true; } - return 0; + return false; } /* * If we weren't the last ref, we're done. */ if (ret) - return 1; + return true; /* * Careful, careful. The reference count went down @@ -770,7 +767,7 @@ static inline bool fast_dput(struct dentry *dentry) /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) - return 1; + return true; /* * Not the fast normal case? Get the lock. We've already decremented @@ -787,7 +784,7 @@ static inline bool fast_dput(struct dentry *dentry) */ if (dentry->d_lockref.count) { spin_unlock(&dentry->d_lock); - return 1; + return true; } /* @@ -796,7 +793,7 @@ static inline bool fast_dput(struct dentry *dentry) * set it to 1. */ dentry->d_lockref.count = 1; - return 0; + return false; } @@ -1892,50 +1889,25 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) spin_lock(&inode->i_lock); __d_instantiate(entry, inode); WARN_ON(!(inode->i_state & I_NEW)); - inode->i_state &= ~I_NEW; + inode->i_state &= ~I_NEW & ~I_CREATING; smp_mb(); wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_instantiate_new); -/** - * d_instantiate_no_diralias - instantiate a non-aliased dentry - * @entry: dentry to complete - * @inode: inode to attach to this dentry - * - * Fill in inode information in the entry. If a directory alias is found, then - * return an error (and drop inode). Together with d_materialise_unique() this - * guarantees that a directory inode may never have more than one alias. - */ -int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) -{ - BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); - - security_d_instantiate(entry, inode); - spin_lock(&inode->i_lock); - if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { - spin_unlock(&inode->i_lock); - iput(inode); - return -EBUSY; - } - __d_instantiate(entry, inode); - spin_unlock(&inode->i_lock); - - return 0; -} -EXPORT_SYMBOL(d_instantiate_no_diralias); - struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; if (root_inode) { res = d_alloc_anon(root_inode->i_sb); - if (res) + if (res) { + res->d_flags |= DCACHE_RCUACCESS; d_instantiate(res, root_inode); - else + } else { iput(root_inode); + } } return res; } @@ -2676,33 +2648,6 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode) } EXPORT_SYMBOL(d_exact_alias); -/** - * dentry_update_name_case - update case insensitive dentry with a new name - * @dentry: dentry to be updated - * @name: new name - * - * Update a case insensitive dentry with new case of name. - * - * dentry must have been returned by d_lookup with name @name. Old and new - * name lengths must match (ie. no d_compare which allows mismatched name - * lengths). - * - * Parent inode i_mutex must be held over d_lookup and into this call (to - * keep renames and concurrent inserts, and readdir(2) away). - */ -void dentry_update_name_case(struct dentry *dentry, const struct qstr *name) -{ - BUG_ON(!inode_is_locked(dentry->d_parent->d_inode)); - BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ - - spin_lock(&dentry->d_lock); - write_seqcount_begin(&dentry->d_seq); - memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); - write_seqcount_end(&dentry->d_seq); - spin_unlock(&dentry->d_lock); -} -EXPORT_SYMBOL(dentry_update_name_case); - static void swap_names(struct dentry *dentry, struct dentry *target) { if (unlikely(dname_external(target))) { diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 71fccccf317e..8c6ab6c95727 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -86,7 +86,9 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, /* length of the variable name itself: remove GUID and separator */ namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1; - uuid_le_to_bin(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); + err = guid_parse(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); + if (err) + goto out; if (efivar_variable_is_removable(var->var.VendorGuid, dentry->d_name.name, namelen)) diff --git a/fs/eventfd.c b/fs/eventfd.c index 61c9514da5e9..08d3bd602f73 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -101,20 +101,14 @@ static int eventfd_release(struct inode *inode, struct file *file) return 0; } -static struct wait_queue_head * -eventfd_get_poll_head(struct file *file, __poll_t events) -{ - struct eventfd_ctx *ctx = file->private_data; - - return &ctx->wqh; -} - -static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) +static __poll_t eventfd_poll(struct file *file, poll_table *wait) { struct eventfd_ctx *ctx = file->private_data; __poll_t events = 0; u64 count; + poll_wait(file, &ctx->wqh, wait); + /* * All writes to ctx->count occur within ctx->wqh.lock. This read * can be done outside ctx->wqh.lock because we know that poll_wait @@ -311,8 +305,7 @@ static const struct file_operations eventfd_fops = { .show_fdinfo = eventfd_show_fdinfo, #endif .release = eventfd_release, - .get_poll_head = eventfd_get_poll_head, - .poll_mask = eventfd_poll_mask, + .poll = eventfd_poll, .read = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, diff --git a/fs/exec.c b/fs/exec.c index 2d4e0075bd24..bdd0eacefdf5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -290,15 +290,15 @@ static int __bprm_mm_init(struct linux_binprm *bprm) struct vm_area_struct *vma = NULL; struct mm_struct *mm = bprm->mm; - bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; + vma_set_anonymous(vma); if (down_write_killable(&mm->mmap_sem)) { err = -EINTR; goto err_free; } - vma->vm_mm = mm; /* * Place the stack at the largest stack address the architecture @@ -311,7 +311,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - INIT_LIST_HEAD(&vma->anon_vma_chain); err = insert_vm_struct(mm, vma); if (err) @@ -326,7 +325,7 @@ err: up_write(&mm->mmap_sem); err_free: bprm->vma = NULL; - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return err; } diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index ddbf87246898..1b8b44637e70 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -146,68 +146,82 @@ int _ore_get_io_state(struct ore_layout *layout, struct ore_io_state **pios) { struct ore_io_state *ios; - struct page **pages; - struct osd_sg_entry *sgilist; + size_t size_ios, size_extra, size_total; + void *ios_extra; + + /* + * The desired layout looks like this, with the extra_allocation + * items pointed at from fields within ios or per_dev: + struct __alloc_all_io_state { struct ore_io_state ios; struct ore_per_dev_state per_dev[numdevs]; union { struct osd_sg_entry sglist[sgs_per_dev * numdevs]; struct page *pages[num_par_pages]; - }; - } *_aios; - - if (likely(sizeof(*_aios) <= PAGE_SIZE)) { - _aios = kzalloc(sizeof(*_aios), GFP_KERNEL); - if (unlikely(!_aios)) { - ORE_DBGMSG("Failed kzalloc bytes=%zd\n", - sizeof(*_aios)); + } extra_allocation; + } whole_allocation; + + */ + + /* This should never happen, so abort early if it ever does. */ + if (sgs_per_dev && num_par_pages) { + ORE_DBGMSG("Tried to use both pages and sglist\n"); + *pios = NULL; + return -EINVAL; + } + + if (numdevs > (INT_MAX - sizeof(*ios)) / + sizeof(struct ore_per_dev_state)) + return -ENOMEM; + size_ios = sizeof(*ios) + sizeof(struct ore_per_dev_state) * numdevs; + + if (sgs_per_dev * numdevs > INT_MAX / sizeof(struct osd_sg_entry)) + return -ENOMEM; + if (num_par_pages > INT_MAX / sizeof(struct page *)) + return -ENOMEM; + size_extra = max(sizeof(struct osd_sg_entry) * (sgs_per_dev * numdevs), + sizeof(struct page *) * num_par_pages); + + size_total = size_ios + size_extra; + + if (likely(size_total <= PAGE_SIZE)) { + ios = kzalloc(size_total, GFP_KERNEL); + if (unlikely(!ios)) { + ORE_DBGMSG("Failed kzalloc bytes=%zd\n", size_total); *pios = NULL; return -ENOMEM; } - pages = num_par_pages ? _aios->pages : NULL; - sgilist = sgs_per_dev ? _aios->sglist : NULL; - ios = &_aios->ios; + ios_extra = (char *)ios + size_ios; } else { - struct __alloc_small_io_state { - struct ore_io_state ios; - struct ore_per_dev_state per_dev[numdevs]; - } *_aio_small; - union __extra_part { - struct osd_sg_entry sglist[sgs_per_dev * numdevs]; - struct page *pages[num_par_pages]; - } *extra_part; - - _aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL); - if (unlikely(!_aio_small)) { + ios = kzalloc(size_ios, GFP_KERNEL); + if (unlikely(!ios)) { ORE_DBGMSG("Failed alloc first part bytes=%zd\n", - sizeof(*_aio_small)); + size_ios); *pios = NULL; return -ENOMEM; } - extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL); - if (unlikely(!extra_part)) { + ios_extra = kzalloc(size_extra, GFP_KERNEL); + if (unlikely(!ios_extra)) { ORE_DBGMSG("Failed alloc second part bytes=%zd\n", - sizeof(*extra_part)); - kfree(_aio_small); + size_extra); + kfree(ios); *pios = NULL; return -ENOMEM; } - pages = num_par_pages ? extra_part->pages : NULL; - sgilist = sgs_per_dev ? extra_part->sglist : NULL; /* In this case the per_dev[0].sgilist holds the pointer to * be freed */ - ios = &_aio_small->ios; ios->extra_part_alloc = true; } - if (pages) { - ios->parity_pages = pages; + if (num_par_pages) { + ios->parity_pages = ios_extra; ios->max_par_pages = num_par_pages; } - if (sgilist) { + if (sgs_per_dev) { + struct osd_sg_entry *sgilist = ios_extra; unsigned d; for (d = 0; d < numdevs; ++d) { diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 27cbdb697649..199590f36203 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -71,6 +71,11 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width, { struct __stripe_pages_2d *sp2d; unsigned data_devs = group_width - parity; + + /* + * Desired allocation layout is, though when larger than PAGE_SIZE, + * each struct __alloc_1p_arrays is separately allocated: + struct _alloc_all_bytes { struct __alloc_stripe_pages_2d { struct __stripe_pages_2d sp2d; @@ -82,55 +87,85 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width, char page_is_read[data_devs]; } __a1pa[pages_in_unit]; } *_aab; + struct __alloc_1p_arrays *__a1pa; struct __alloc_1p_arrays *__a1pa_end; - const unsigned sizeof__a1pa = sizeof(_aab->__a1pa[0]); + + */ + + char *__a1pa; + char *__a1pa_end; + + const size_t sizeof_stripe_pages_2d = + sizeof(struct __stripe_pages_2d) + + sizeof(struct __1_page_stripe) * pages_in_unit; + const size_t sizeof__a1pa = + ALIGN(sizeof(struct page *) * (2 * group_width) + data_devs, + sizeof(void *)); + const size_t sizeof__a1pa_arrays = sizeof__a1pa * pages_in_unit; + const size_t alloc_total = sizeof_stripe_pages_2d + + sizeof__a1pa_arrays; + unsigned num_a1pa, alloc_size, i; /* FIXME: check these numbers in ore_verify_layout */ - BUG_ON(sizeof(_aab->__asp2d) > PAGE_SIZE); + BUG_ON(sizeof_stripe_pages_2d > PAGE_SIZE); BUG_ON(sizeof__a1pa > PAGE_SIZE); - if (sizeof(*_aab) > PAGE_SIZE) { - num_a1pa = (PAGE_SIZE - sizeof(_aab->__asp2d)) / sizeof__a1pa; - alloc_size = sizeof(_aab->__asp2d) + sizeof__a1pa * num_a1pa; + /* + * If alloc_total would be larger than PAGE_SIZE, only allocate + * as many a1pa items as would fill the rest of the page, instead + * of the full pages_in_unit count. + */ + if (alloc_total > PAGE_SIZE) { + num_a1pa = (PAGE_SIZE - sizeof_stripe_pages_2d) / sizeof__a1pa; + alloc_size = sizeof_stripe_pages_2d + sizeof__a1pa * num_a1pa; } else { num_a1pa = pages_in_unit; - alloc_size = sizeof(*_aab); + alloc_size = alloc_total; } - _aab = kzalloc(alloc_size, GFP_KERNEL); - if (unlikely(!_aab)) { + *psp2d = sp2d = kzalloc(alloc_size, GFP_KERNEL); + if (unlikely(!sp2d)) { ORE_DBGMSG("!! Failed to alloc sp2d size=%d\n", alloc_size); return -ENOMEM; } + /* From here Just call _sp2d_free */ - sp2d = &_aab->__asp2d.sp2d; - *psp2d = sp2d; /* From here Just call _sp2d_free */ - - __a1pa = _aab->__a1pa; - __a1pa_end = __a1pa + num_a1pa; + /* Find start of a1pa area. */ + __a1pa = (char *)sp2d + sizeof_stripe_pages_2d; + /* Find end of the _allocated_ a1pa area. */ + __a1pa_end = __a1pa + alloc_size; + /* Allocate additionally needed a1pa items in PAGE_SIZE chunks. */ for (i = 0; i < pages_in_unit; ++i) { + struct __1_page_stripe *stripe = &sp2d->_1p_stripes[i]; + if (unlikely(__a1pa >= __a1pa_end)) { num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa, pages_in_unit - i); + alloc_size = sizeof__a1pa * num_a1pa; - __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL); + __a1pa = kzalloc(alloc_size, GFP_KERNEL); if (unlikely(!__a1pa)) { ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n", num_a1pa); return -ENOMEM; } - __a1pa_end = __a1pa + num_a1pa; + __a1pa_end = __a1pa + alloc_size; /* First *pages is marked for kfree of the buffer */ - sp2d->_1p_stripes[i].alloc = true; + stripe->alloc = true; } - sp2d->_1p_stripes[i].pages = __a1pa->pages; - sp2d->_1p_stripes[i].scribble = __a1pa->scribble ; - sp2d->_1p_stripes[i].page_is_read = __a1pa->page_is_read; - ++__a1pa; + /* + * Attach all _lp_stripes pointers to the allocation for + * it which was either part of the original PAGE_SIZE + * allocation or the subsequent allocation in this loop. + */ + stripe->pages = (void *)__a1pa; + stripe->scribble = stripe->pages + group_width; + stripe->page_is_read = (char *)stripe->scribble + group_width; + __a1pa += sizeof__a1pa; } sp2d->parity = parity; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 719a3152da80..41cf2fbee50d 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -549,27 +549,26 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, static int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs, struct exofs_dev **peds) { - struct __alloc_ore_devs_and_exofs_devs { - /* Twice bigger table: See exofs_init_comps() and comment at - * exofs_read_lookup_dev_table() - */ - struct ore_dev *oreds[numdevs * 2 - 1]; - struct exofs_dev eds[numdevs]; - } *aoded; + /* Twice bigger table: See exofs_init_comps() and comment at + * exofs_read_lookup_dev_table() + */ + const size_t numores = numdevs * 2 - 1; struct exofs_dev *eds; unsigned i; - aoded = kzalloc(sizeof(*aoded), GFP_KERNEL); - if (unlikely(!aoded)) { + sbi->oc.ods = kzalloc(numores * sizeof(struct ore_dev *) + + numdevs * sizeof(struct exofs_dev), GFP_KERNEL); + if (unlikely(!sbi->oc.ods)) { EXOFS_ERR("ERROR: failed allocating Device array[%d]\n", numdevs); return -ENOMEM; } - sbi->oc.ods = aoded->oreds; - *peds = eds = aoded->eds; + /* Start of allocated struct exofs_dev entries */ + *peds = eds = (void *)sbi->oc.ods[numores]; + /* Initialize pointers into struct exofs_dev */ for (i = 0; i < numdevs; ++i) - aoded->oreds[i] = &eds[i].ored; + sbi->oc.ods[i] = &eds[i].ored; return 0; } diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index cc40802ddfa8..00e759f05161 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -748,7 +748,6 @@ extern void ext2_free_blocks (struct inode *, unsigned long, unsigned long); extern unsigned long ext2_count_free_blocks (struct super_block *); extern unsigned long ext2_count_dirs (struct super_block *); -extern void ext2_check_blocks_bitmap (struct super_block *); extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, unsigned int block_group, struct buffer_head ** bh); @@ -771,7 +770,6 @@ extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *); extern void ext2_free_inode (struct inode *); extern unsigned long ext2_count_free_inodes (struct super_block *); -extern void ext2_check_inodes_bitmap (struct super_block *); extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 6484199b35d1..5c3d7b7e4975 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -611,8 +611,7 @@ fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return ERR_PTR(err); fail: diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 152453a91877..0c26dcc5d850 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -45,8 +45,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) return 0; } inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -192,8 +191,7 @@ out: out_fail: inode_dec_link_count(inode); - unlock_new_inode(inode); - iput (inode); + discard_new_inode(inode); goto out; } @@ -261,8 +259,7 @@ out: out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); out_dir: inode_dec_link_count(dir); goto out; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 25ab1274090f..8ff53f8da3bc 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -557,6 +557,9 @@ static int parse_options(char *options, struct super_block *sb, set_opt (opts->s_mount_opt, NO_UID32); break; case Opt_nocheck: + ext2_msg(sb, KERN_WARNING, + "Option nocheck/check=none is deprecated and" + " will be removed in June 2020."); clear_opt (opts->s_mount_opt, CHECK); break; case Opt_debug: @@ -1335,9 +1338,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) new_opts.s_resgid = sbi->s_resgid; spin_unlock(&sbi->s_lock); - /* - * Allow the "check" option to be passed as a remount option. - */ if (!parse_options(data, sb, &new_opts)) return -EINVAL; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index b00481c475cb..aa52d87985aa 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -184,7 +184,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, unsigned int bit, bit_max; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t start, tmp; - int flex_bg = 0; J_ASSERT_BH(bh, buffer_locked(bh)); @@ -207,22 +206,19 @@ static int ext4_init_block_bitmap(struct super_block *sb, start = ext4_group_first_block_no(sb, block_group); - if (ext4_has_feature_flex_bg(sb)) - flex_bg = 1; - /* Set bits for block and inode bitmaps, and inode table */ tmp = ext4_block_bitmap(sb, gdp); - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); tmp = ext4_inode_bitmap(sb, gdp); - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); tmp = ext4_inode_table(sb, gdp); for (; tmp < ext4_inode_table(sb, gdp) + sbi->s_itb_per_group; tmp++) { - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); } @@ -372,6 +368,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, desc, bh))) { ext4_unlock_group(sb, block_group); @@ -390,6 +388,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb, return -EFSCORRUPTED; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } @@ -442,7 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) goto verify; } ext4_lock_group(sb, block_group); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + if (block_group == 0) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + ext4_error(sb, "Block bitmap for bg 0 marked " + "uninitialized"); + err = -EFSCORRUPTED; + goto out; + } err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index df95412915ea..7c7123f265c2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -817,12 +817,14 @@ static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; } -#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ -do { \ - (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ - if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ - (raw_inode)->xtime ## _extra = \ - ext4_encode_extra_time(&(inode)->xtime); \ +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +do { \ + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {\ + struct timespec ts = timespec64_to_timespec((inode)->xtime); \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&ts); \ + } \ } while (0) #define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ @@ -834,16 +836,20 @@ do { \ ext4_encode_extra_time(&(einode)->xtime); \ } while (0) -#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ -do { \ - (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ - if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ - ext4_decode_extra_time(&(inode)->xtime, \ - raw_inode->xtime ## _extra); \ - else \ - (inode)->xtime.tv_nsec = 0; \ +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ +do { \ + (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) { \ + struct timespec ts = timespec64_to_timespec((inode)->xtime); \ + ext4_decode_extra_time(&ts, \ + raw_inode->xtime ## _extra); \ + (inode)->xtime = timespec_to_timespec64(ts); \ + } \ + else \ + (inode)->xtime.tv_nsec = 0; \ } while (0) + #define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ do { \ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ @@ -1108,6 +1114,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ +#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ @@ -1501,11 +1508,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode) static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || - ino == EXT4_USR_QUOTA_INO || - ino == EXT4_GRP_QUOTA_INO || - ino == EXT4_BOOT_LOADER_INO || - ino == EXT4_JOURNAL_INO || - ino == EXT4_RESIZE_INO || (ino >= EXT4_FIRST_INO(sb) && ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } @@ -3012,9 +3014,6 @@ extern int ext4_inline_data_fiemap(struct inode *inode, struct iomap; extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap); -extern int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed); extern int ext4_inline_data_truncate(struct inode *inode, int *has_inline); extern int ext4_convert_inline_data(struct inode *inode); diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 98fb0c119c68..adf6668b596f 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -91,6 +91,7 @@ struct ext4_extent_header { }; #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) +#define EXT4_MAX_EXTENT_DEPTH 5 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \ (sizeof(struct ext4_extent_header) + \ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0057fe3f248d..8ce6fd5b10dd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -869,6 +869,12 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, eh = ext_inode_hdr(inode); depth = ext_depth(inode); + if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) { + EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d", + depth); + ret = -EFSCORRUPTED; + goto err; + } if (path) { ext4_ext_drop_refs(path); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 4d6e007f3569..f336cbc6e932 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -90,6 +90,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSCORRUPTED; ext4_lock_group(sb, block_group); + if (buffer_verified(bh)) + goto verified; blk = ext4_inode_bitmap(sb, desc); if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, EXT4_INODES_PER_GROUP(sb) / 8)) { @@ -101,6 +103,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, return -EFSBADCRC; } set_buffer_verified(bh); +verified: ext4_unlock_group(sb, block_group); return 0; } @@ -150,7 +153,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) { + if (block_group == 0) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + ext4_error(sb, "Inode bitmap for bg 0 marked " + "uninitialized"); + err = -EFSCORRUPTED; + goto out; + } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); @@ -994,7 +1006,8 @@ got: /* recheck and clear flag under lock if we still need to */ ext4_lock_group(sb, group); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); @@ -1072,8 +1085,8 @@ got: inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = - current_time(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + ei->i_crtime = timespec64_to_timespec(inode->i_mtime); memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_dir_start_lookup = 0; @@ -1375,7 +1388,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, ext4_itable_unused_count(sb, gdp)), sbi->s_inodes_per_block); - if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { + if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) || + ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp)) < + EXT4_FIRST_INO(sb)))) { ext4_error(sb, "Something is wrong with group %u: " "used itable blocks: %d; " "itable unused count: %u", diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 285ed1588730..3543fe80a3c4 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -437,6 +437,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); + memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); if (ext4_has_feature_extents(inode->i_sb)) { if (S_ISDIR(inode->i_mode) || @@ -681,6 +682,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, goto convert; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out; + flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, 0, flags); @@ -709,7 +714,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping, out_up_read: up_read(&EXT4_I(inode)->xattr_sem); out: - if (handle) + if (handle && (ret != 1)) ext4_journal_stop(handle); brelse(iloc.bh); return ret; @@ -751,6 +756,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); + mark_inode_dirty(inode); out: return copied; } @@ -886,18 +892,17 @@ retry_journal: flags |= AOP_FLAG_NOFS; if (ret == -ENOSPC) { + ext4_journal_stop(handle); ret = ext4_da_convert_inline_data_to_extent(mapping, inode, flags, fsdata); - ext4_journal_stop(handle); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; goto out; } - page = grab_cache_page_write_begin(mapping, 0, flags); if (!page) { ret = -ENOMEM; @@ -915,6 +920,9 @@ retry_journal: if (ret < 0) goto out_release_page; } + ret = ext4_journal_get_write_access(handle, iloc.bh); + if (ret) + goto out_release_page; up_read(&EXT4_I(inode)->xattr_sem); *pagep = page; @@ -935,7 +943,6 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int i_size_changed = 0; int ret; ret = ext4_write_inline_data_end(inode, pos, len, copied, page); @@ -953,10 +960,8 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * But it's important to update i_size while still holding page lock: * page writeout could otherwise come in and zero beyond i_size. */ - if (pos+copied > inode->i_size) { + if (pos+copied > inode->i_size) i_size_write(inode, pos+copied); - i_size_changed = 1; - } unlock_page(page); put_page(page); @@ -966,8 +971,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) - mark_inode_dirty(inode); + mark_inode_dirty(inode); return copied; } @@ -1890,42 +1894,6 @@ out: return (error < 0 ? error : 0); } -/* - * Called during xattr set, and if we can sparse space 'needed', - * just create the extent tree evict the data to the outer block. - * - * We use jbd2 instead of page cache to move data to the 1st block - * so that the whole transaction can be committed as a whole and - * the data isn't lost because of the delayed page cache write. - */ -int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed) -{ - int error; - struct ext4_xattr_entry *entry; - struct ext4_inode *raw_inode; - struct ext4_iloc iloc; - - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - - raw_inode = ext4_raw_inode(&iloc); - entry = (struct ext4_xattr_entry *)((void *)raw_inode + - EXT4_I(inode)->i_inline_off); - if (EXT4_XATTR_LEN(entry->e_name_len) + - EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) { - error = -ENOSPC; - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); -out: - brelse(iloc.bh); - return error; -} - int ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2ea07efbe016..4efe77286ecd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -402,9 +402,9 @@ static int __check_block_validity(struct inode *inode, const char *func, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, - "lblock %lu mapped to illegal pblock " + "lblock %lu mapped to illegal pblock %llu " "(length %d)", (unsigned long) map->m_lblk, - map->m_len); + map->m_pblk, map->m_len); return -EFSCORRUPTED; } return 0; @@ -1389,9 +1389,10 @@ static int ext4_write_end(struct file *file, loff_t old_size = inode->i_size; int ret = 0, ret2; int i_size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_write_end(inode, pos, len, copied); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1419,7 +1420,7 @@ static int ext4_write_end(struct file *file, * ordering of page lock and transaction start for journaling * filesystems. */ - if (i_size_changed) + if (i_size_changed || inline_data) ext4_mark_inode_dirty(handle, inode); if (pos + len > inode->i_size && ext4_can_truncate(inode)) @@ -1493,6 +1494,7 @@ static int ext4_journalled_write_end(struct file *file, int partial = 0; unsigned from, to; int size_changed = 0; + int inline_data = ext4_has_inline_data(inode); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_SIZE - 1); @@ -1500,7 +1502,7 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (ext4_has_inline_data(inode)) { + if (inline_data) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -1531,7 +1533,7 @@ static int ext4_journalled_write_end(struct file *file, if (old_size < pos) pagecache_isize_extended(inode, old_size, pos); - if (size_changed) { + if (size_changed || inline_data) { ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; @@ -2028,11 +2030,7 @@ static int __ext4_journalled_writepage(struct page *page, } if (inline_data) { - BUFFER_TRACE(inode_bh, "get write access"); - ret = ext4_journal_get_write_access(handle, inode_bh); - - err = ext4_handle_dirty_metadata(handle, inode, inode_bh); - + ret = ext4_mark_inode_dirty(handle, inode); } else { ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, do_journal_get_write_access); @@ -4506,7 +4504,8 @@ static int __ext4_get_inode_loc(struct inode *inode, int inodes_per_block, inode_offset; iloc->bh = NULL; - if (!ext4_valid_inum(sb, inode->i_ino)) + if (inode->i_ino < EXT4_ROOT_INO || + inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) return -EFSCORRUPTED; iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6eae2b91aafa..f7ab34088162 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2423,7 +2423,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, * initialize bb_free to be able to skip * empty groups without initialization */ - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { meta_group_info[i]->bb_free = ext4_free_clusters_after_init(sb, group, desc); } else { @@ -2989,7 +2990,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, #endif ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 27b9a76a0dfa..638ad4743477 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -186,11 +186,8 @@ static int kmmpd(void *data) goto exit_thread; } - if (sb_rdonly(sb)) { - ext4_warning(sb, "kmmpd being stopped since filesystem " - "has been remounted as readonly."); - goto exit_thread; - } + if (sb_rdonly(sb)) + break; diff = jiffies - last_update_time; if (diff < mmp_update_interval * HZ) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4a09063ce1d2..2a4c25c4681d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3673,7 +3673,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, }; u8 new_file_type; int retval; - struct timespec ctime; + struct timespec64 ctime; if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) && !projid_eq(EXT4_I(new_dir)->i_projid, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0c4c2201b3aa..b7f7922061be 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -405,6 +405,9 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) static void ext4_handle_error(struct super_block *sb) { + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + if (sb_rdonly(sb)) return; @@ -740,6 +743,9 @@ __acquires(bitlock) va_end(args); } + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + if (test_opt(sb, ERRORS_CONT)) { ext4_commit_super(sb, 0); return; @@ -1371,7 +1377,8 @@ enum { Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax, - Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, + Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, + Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, @@ -1438,6 +1445,8 @@ static const match_table_t tokens = { {Opt_dax, "dax"}, {Opt_stripe, "stripe=%u"}, {Opt_delalloc, "delalloc"}, + {Opt_warn_on_error, "warn_on_error"}, + {Opt_nowarn_on_error, "nowarn_on_error"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"}, @@ -1602,6 +1611,8 @@ static const struct mount_opts { MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_EXT4_ONLY | MOPT_CLEAR}, + {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET}, + {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR}, {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, @@ -2331,6 +2342,7 @@ static int ext4_check_descriptors(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ext4_fsblk_t last_block; + ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0); ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; @@ -2363,6 +2375,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (block_bitmap >= sb_block + 1 && + block_bitmap <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Block bitmap for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u not in group " @@ -2377,6 +2397,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (inode_bitmap >= sb_block + 1 && + inode_bitmap <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode bitmap for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u not in group " @@ -2391,6 +2419,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (inode_table >= sb_block + 1 && + inode_table <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode table for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -3097,6 +3133,9 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *gdp = NULL; + if (!ext4_has_group_desc_csum(sb)) + return ngroups; + for (group = 0; group < ngroups; group++) { gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) @@ -3742,6 +3781,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) le32_to_cpu(es->s_log_block_size)); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ext4_msg(sb, KERN_ERR, @@ -3806,6 +3852,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } else { sbi->s_inode_size = le16_to_cpu(es->s_inode_size); sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { + ext4_msg(sb, KERN_ERR, "invalid first ino: %u", + sbi->s_first_ino); + goto failed_mount; + } if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { @@ -3882,13 +3933,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "block size (%d)", clustersize, blocksize); goto failed_mount; } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; - } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); sbi->s_clusters_per_group = @@ -3909,10 +3953,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } else { if (clustersize != blocksize) { - ext4_warning(sb, "fragment/cluster size (%d) != " - "block size (%d)", clustersize, - blocksize); - clustersize = blocksize; + ext4_msg(sb, KERN_ERR, + "fragment/cluster size (%d) != " + "block size (%d)", clustersize, blocksize); + goto failed_mount; } if (sbi->s_blocks_per_group > blocksize * 8) { ext4_msg(sb, KERN_ERR, @@ -3966,6 +4010,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_blocks_count(es)); goto failed_mount; } + if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && + (sbi->s_cluster_ratio == 1)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " + "block is 0 with a 1k block and cluster size"); + goto failed_mount; + } + blocks_count = (ext4_blocks_count(es) - le32_to_cpu(es->s_first_data_block) + EXT4_BLOCKS_PER_GROUP(sb) - 1); @@ -4001,6 +4052,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ret = -ENOMEM; goto failed_mount; } + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + ret = -EINVAL; + goto failed_mount; + } bgl_lock_init(sbi->s_blockgroup_lock); @@ -4020,14 +4079,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount2; } } + sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); ret = -EFSCORRUPTED; goto failed_mount2; } - sbi->s_gdb_count = db_count; - timer_setup(&sbi->s_err_report, print_daily_error_info, 0); /* Register extent status tree shrinker */ @@ -4736,6 +4794,14 @@ static int ext4_commit_super(struct super_block *sb, int sync) if (!sbh || block_device_ejected(sb)) return error; + + /* + * The superblock bh should be mapped, but it might not be if the + * device was hot-removed. Not much we can do but fail the I/O. + */ + if (!buffer_mapped(sbh)) + return error; + /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock @@ -5140,6 +5206,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); + if (sbi->s_mmp_tsk) + kthread_stop(sbi->s_mmp_tsk); } else { /* Make sure we can mount this feature set readwrite */ if (ext4_has_feature_readonly(sb) || diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fc4ced59c565..723df14f4084 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -230,12 +230,12 @@ __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, { int error = -EFSCORRUPTED; - if (buffer_verified(bh)) - return 0; - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) goto errout; + if (buffer_verified(bh)) + return 0; + error = -EFSBADCRC; if (!ext4_xattr_block_csum_verify(inode, bh)) goto errout; @@ -1560,7 +1560,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, handle_t *handle, struct inode *inode, bool is_block) { - struct ext4_xattr_entry *last; + struct ext4_xattr_entry *last, *next; struct ext4_xattr_entry *here = s->here; size_t min_offs = s->end - s->base, name_len = strlen(i->name); int in_inode = i->in_inode; @@ -1595,7 +1595,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, /* Compute min_offs and last. */ last = s->first; - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + for (; !IS_LAST_ENTRY(last); last = next) { + next = EXT4_XATTR_NEXT(last); + if ((void *)next >= s->end) { + EXT4_ERROR_INODE(inode, "corrupted xattr entries"); + ret = -EFSCORRUPTED; + goto out; + } if (!last->e_value_inum && last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) @@ -2206,23 +2212,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, if (EXT4_I(inode)->i_extra_isize == 0) return -ENOSPC; error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); - if (error) { - if (error == -ENOSPC && - ext4_has_inline_data(inode)) { - error = ext4_try_to_evict_inline_data(handle, inode, - EXT4_XATTR_LEN(strlen(i->name) + - EXT4_XATTR_SIZE(i->value_len))); - if (error) - return error; - error = ext4_xattr_ibody_find(inode, i, is); - if (error) - return error; - error = ext4_xattr_set_entry(i, s, handle, inode, - false /* is_block */); - } - if (error) - return error; - } + if (error) + return error; header = IHDR(inode, ext4_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); @@ -2651,6 +2642,11 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode, last = IFIRST(header); /* Find the entry best suited to be pushed into EA block */ for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + /* never move system.data out of the inode */ + if ((last->e_name_len == 4) && + (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) && + !memcmp(last->e_name, "data", 4)) + continue; total_size = EXT4_XATTR_LEN(last->e_name_len); if (!last->e_value_inum) total_size += EXT4_XATTR_SIZE( diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4c09e770a0a3..4d8b1de83143 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2518,6 +2518,7 @@ static inline void clear_file(struct inode *inode, int type) static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) { + struct timespec ts; bool ret; if (dsync) { @@ -2533,11 +2534,14 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) i_size_read(inode) & ~PAGE_MASK) return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + ts = timespec64_to_timespec(inode->i_atime); + if (!timespec_equal(F2FS_I(inode)->i_disk_time, &ts)) return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + ts = timespec64_to_timespec(inode->i_ctime); + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &ts)) return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + ts = timespec64_to_timespec(inode->i_mtime); + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &ts)) return false; if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, &F2FS_I(inode)->i_crtime)) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cadb425c02d7..6880c6f78d58 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -730,14 +730,14 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; if (ia_valid & ATTR_ATIME) - inode->i_atime = timespec_trunc(attr->ia_atime, - inode->i_sb->s_time_gran); + inode->i_atime = timespec64_trunc(attr->ia_atime, + inode->i_sb->s_time_gran); if (ia_valid & ATTR_MTIME) - inode->i_mtime = timespec_trunc(attr->ia_mtime, - inode->i_sb->s_time_gran); + inode->i_mtime = timespec64_trunc(attr->ia_mtime, + inode->i_sb->s_time_gran); if (ia_valid & ATTR_CTIME) - inode->i_ctime = timespec_trunc(attr->ia_ctime, - inode->i_sb->s_time_gran); + inode->i_ctime = timespec64_trunc(attr->ia_ctime, + inode->i_sb->s_time_gran); if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 30a777369d2b..f121c864f4c0 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -297,9 +297,9 @@ static int do_read_inode(struct inode *inode) fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); } - F2FS_I(inode)->i_disk_time[0] = inode->i_atime; - F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; - F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime); + F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime); + F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime); F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; f2fs_put_page(node_page, 1); @@ -470,9 +470,9 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) if (inode->i_nlink == 0) clear_inline_node(node_page); - F2FS_I(inode)->i_disk_time[0] = inode->i_atime; - F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; - F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime); + F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime); + F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime); F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 64050c84d353..231b7f3ea7d3 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -50,8 +50,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) inode->i_ino = ino; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = - F2FS_I(inode)->i_crtime = current_time(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + F2FS_I(inode)->i_crtime = timespec64_to_timespec(inode->i_mtime); inode->i_generation = sbi->s_next_generation++; if (S_ISDIR(inode->i_mode)) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ffbbf0520d9e..bfd589ea74c0 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -158,8 +158,14 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create, false); if (err) return err; + if (!phys) { + fat_fs_error(sb, + "invalid FAT chain (i_pos %lld, last_block %llu)", + MSDOS_I(inode)->i_pos, + (unsigned long long)last_block); + return -EIO; + } - BUG_ON(!phys); BUG_ON(*max_blocks != mapped_blocks); set_buffer_new(bh_result); map_bh(bh_result, sb, phys); @@ -502,6 +508,7 @@ static int fat_validate_dir(struct inode *dir) /* doesn't deal with root inode */ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) { + struct timespec ts; struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); int error; @@ -552,11 +559,14 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; - fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0); + fat_time_fat2unix(sbi, &ts, de->time, de->date, 0); + inode->i_mtime = timespec_to_timespec64(ts); if (sbi->options.isvfat) { - fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime, + fat_time_fat2unix(sbi, &ts, de->ctime, de->cdate, de->ctime_cs); - fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0); + inode->i_ctime = timespec_to_timespec64(ts); + fat_time_fat2unix(sbi, &ts, 0, de->adate, 0); + inode->i_atime = timespec_to_timespec64(ts); } else inode->i_ctime = inode->i_atime = inode->i_mtime; @@ -697,13 +707,21 @@ static void fat_set_state(struct super_block *sb, brelse(bh); } +static void fat_reset_iocharset(struct fat_mount_options *opts) +{ + if (opts->iocharset != fat_default_iocharset) { + /* Note: opts->iocharset can be NULL here */ + kfree(opts->iocharset); + opts->iocharset = fat_default_iocharset; + } +} + static void delayed_free(struct rcu_head *p) { struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); unload_nls(sbi->nls_disk); unload_nls(sbi->nls_io); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); kfree(sbi); } @@ -825,6 +843,7 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) static int __fat_write_inode(struct inode *inode, int wait) { + struct timespec ts; struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh; @@ -862,13 +881,16 @@ retry: raw_entry->size = cpu_to_le32(inode->i_size); raw_entry->attr = fat_make_attrs(inode); fat_set_start(raw_entry, MSDOS_I(inode)->i_logstart); - fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, + ts = timespec64_to_timespec(inode->i_mtime); + fat_time_unix2fat(sbi, &ts, &raw_entry->time, &raw_entry->date, NULL); if (sbi->options.isvfat) { __le16 atime; - fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime, + ts = timespec64_to_timespec(inode->i_ctime); + fat_time_unix2fat(sbi, &ts, &raw_entry->ctime, &raw_entry->cdate, &raw_entry->ctime_cs); - fat_time_unix2fat(sbi, &inode->i_atime, &atime, + ts = timespec64_to_timespec(inode->i_atime); + fat_time_unix2fat(sbi, &ts, &atime, &raw_entry->adate, NULL); } spin_unlock(&sbi->inode_hash_lock); @@ -1118,7 +1140,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, opts->fs_fmask = opts->fs_dmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; - opts->iocharset = fat_default_iocharset; + fat_reset_iocharset(opts); if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; opts->rodir = 0; @@ -1275,8 +1297,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, /* vfat specific */ case Opt_charset: - if (opts->iocharset != fat_default_iocharset) - kfree(opts->iocharset); + fat_reset_iocharset(opts); iocharset = match_strdup(&args[0]); if (!iocharset) return -ENOMEM; @@ -1867,8 +1888,7 @@ out_fail: iput(fat_inode); unload_nls(sbi->nls_io); unload_nls(sbi->nls_disk); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); sb->s_fs_info = NULL; kfree(sbi); return error; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 484ce674e0cd..16a832c37d66 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -250,7 +250,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, if (err) return err; - dir->i_ctime = dir->i_mtime = *ts; + dir->i_ctime = dir->i_mtime = timespec_to_timespec64(*ts); if (IS_DIRSYNC(dir)) (void)fat_sync_inode(dir); else @@ -266,7 +266,8 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct super_block *sb = dir->i_sb; struct inode *inode = NULL; struct fat_slot_info sinfo; - struct timespec ts; + struct timespec64 ts; + struct timespec t; unsigned char msdos_name[MSDOS_NAME]; int err, is_hid; @@ -285,7 +286,8 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode, } ts = current_time(dir); - err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &ts, &sinfo); + t = timespec64_to_timespec(ts); + err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &t, &sinfo); if (err) goto out; inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); @@ -344,7 +346,8 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct fat_slot_info sinfo; struct inode *inode; unsigned char msdos_name[MSDOS_NAME]; - struct timespec ts; + struct timespec64 ts; + struct timespec t; int err, is_hid, cluster; mutex_lock(&MSDOS_SB(sb)->s_lock); @@ -362,12 +365,13 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } ts = current_time(dir); - cluster = fat_alloc_new_dir(dir, &ts); + t = timespec64_to_timespec(ts); + cluster = fat_alloc_new_dir(dir, &t); if (cluster < 0) { err = cluster; goto out; } - err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &ts, &sinfo); + err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &t, &sinfo); if (err) goto out_free; inc_nlink(dir); @@ -432,7 +436,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, struct msdos_dir_entry *dotdot_de; struct inode *old_inode, *new_inode; struct fat_slot_info old_sinfo, sinfo; - struct timespec ts; + struct timespec64 ts; loff_t new_i_pos; int err, old_attrs, is_dir, update_dotdot, corrupt = 0; @@ -499,8 +503,9 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, new_i_pos = MSDOS_I(new_inode)->i_pos; fat_detach(new_inode); } else { + struct timespec t = timespec64_to_timespec(ts); err = msdos_add_entry(new_dir, new_name, is_dir, is_hid, 0, - &ts, &sinfo); + &t, &sinfo); if (err) goto out; new_i_pos = sinfo.i_pos; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index d4e23f8ddcf6..9a5469120caa 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -678,7 +678,7 @@ static int vfat_add_entry(struct inode *dir, const struct qstr *qname, goto cleanup; /* update timestamp */ - dir->i_ctime = dir->i_mtime = dir->i_atime = *ts; + dir->i_ctime = dir->i_mtime = dir->i_atime = timespec_to_timespec64(*ts); if (IS_DIRSYNC(dir)) (void)fat_sync_inode(dir); else @@ -761,13 +761,15 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct super_block *sb = dir->i_sb; struct inode *inode; struct fat_slot_info sinfo; - struct timespec ts; + struct timespec64 ts; + struct timespec t; int err; mutex_lock(&MSDOS_SB(sb)->s_lock); ts = current_time(dir); - err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); + t = timespec64_to_timespec(ts); + err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &t, &sinfo); if (err) goto out; inode_inc_iversion(dir); @@ -850,18 +852,20 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct super_block *sb = dir->i_sb; struct inode *inode; struct fat_slot_info sinfo; - struct timespec ts; + struct timespec64 ts; + struct timespec t; int err, cluster; mutex_lock(&MSDOS_SB(sb)->s_lock); ts = current_time(dir); - cluster = fat_alloc_new_dir(dir, &ts); + t = timespec64_to_timespec(ts); + cluster = fat_alloc_new_dir(dir, &t); if (cluster < 0) { err = cluster; goto out; } - err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &ts, &sinfo); + err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &t, &sinfo); if (err) goto out_free; inode_inc_iversion(dir); @@ -899,7 +903,8 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, struct msdos_dir_entry *dotdot_de; struct inode *old_inode, *new_inode; struct fat_slot_info old_sinfo, sinfo; - struct timespec ts; + struct timespec64 ts; + struct timespec t; loff_t new_i_pos; int err, is_dir, update_dotdot, corrupt = 0; struct super_block *sb = old_dir->i_sb; @@ -934,8 +939,9 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, new_i_pos = MSDOS_I(new_inode)->i_pos; fat_detach(new_inode); } else { + t = timespec64_to_timespec(ts); err = vfat_add_entry(new_dir, &new_dentry->d_name, is_dir, 0, - &ts, &sinfo); + &t, &sinfo); if (err) goto out; new_i_pos = sinfo.i_pos; diff --git a/fs/file_table.c b/fs/file_table.c index 7ec0b3e5f05d..d6eccd04d703 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -51,6 +51,7 @@ static void file_free_rcu(struct rcu_head *head) static inline void file_free(struct file *f) { + security_file_free(f); percpu_counter_dec(&nr_files); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -100,9 +101,8 @@ int proc_nr_files(struct ctl_table *table, int write, * done, you will imbalance int the mount's writer count * and a warning at __fput() time. */ -struct file *get_empty_filp(void) +struct file *alloc_empty_file(int flags, const struct cred *cred) { - const struct cred *cred = current_cred(); static long old_max; struct file *f; int error; @@ -123,11 +123,10 @@ struct file *get_empty_filp(void) if (unlikely(!f)) return ERR_PTR(-ENOMEM); - percpu_counter_inc(&nr_files); f->f_cred = get_cred(cred); error = security_file_alloc(f); if (unlikely(error)) { - file_free(f); + file_free_rcu(&f->f_u.fu_rcuhead); return ERR_PTR(error); } @@ -136,7 +135,10 @@ struct file *get_empty_filp(void) spin_lock_init(&f->f_lock); mutex_init(&f->f_pos_lock); eventpoll_init_file(f); + f->f_flags = flags; + f->f_mode = OPEN_FMODE(flags); /* f->f_version: 0 */ + percpu_counter_inc(&nr_files); return f; over: @@ -152,15 +154,15 @@ over: * alloc_file - allocate and initialize a 'struct file' * * @path: the (dentry, vfsmount) pair for the new file - * @mode: the mode with which the new file will be opened + * @flags: O_... flags with which the new file will be opened * @fop: the 'struct file_operations' for the new file */ -struct file *alloc_file(const struct path *path, fmode_t mode, +static struct file *alloc_file(const struct path *path, int flags, const struct file_operations *fop) { struct file *file; - file = get_empty_filp(); + file = alloc_empty_file(flags, current_cred()); if (IS_ERR(file)) return file; @@ -168,19 +170,56 @@ struct file *alloc_file(const struct path *path, fmode_t mode, file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; file->f_wb_err = filemap_sample_wb_err(file->f_mapping); - if ((mode & FMODE_READ) && + if ((file->f_mode & FMODE_READ) && likely(fop->read || fop->read_iter)) - mode |= FMODE_CAN_READ; - if ((mode & FMODE_WRITE) && + file->f_mode |= FMODE_CAN_READ; + if ((file->f_mode & FMODE_WRITE) && likely(fop->write || fop->write_iter)) - mode |= FMODE_CAN_WRITE; - file->f_mode = mode; + file->f_mode |= FMODE_CAN_WRITE; + file->f_mode |= FMODE_OPENED; file->f_op = fop; - if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) + if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); return file; } -EXPORT_SYMBOL(alloc_file); + +struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, + const char *name, int flags, + const struct file_operations *fops) +{ + static const struct dentry_operations anon_ops = { + .d_dname = simple_dname + }; + struct qstr this = QSTR_INIT(name, strlen(name)); + struct path path; + struct file *file; + + path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this); + if (!path.dentry) + return ERR_PTR(-ENOMEM); + if (!mnt->mnt_sb->s_d_op) + d_set_d_op(path.dentry, &anon_ops); + path.mnt = mntget(mnt); + d_instantiate(path.dentry, inode); + file = alloc_file(&path, flags, fops); + if (IS_ERR(file)) { + ihold(inode); + path_put(&path); + } + return file; +} +EXPORT_SYMBOL(alloc_file_pseudo); + +struct file *alloc_file_clone(struct file *base, int flags, + const struct file_operations *fops) +{ + struct file *f = alloc_file(&base->f_path, flags, fops); + if (!IS_ERR(f)) { + path_get(&f->f_path); + f->f_mapping = base->f_mapping; + } + return f; +} /* the real guts of fput() - releasing the last reference to file */ @@ -190,6 +229,9 @@ static void __fput(struct file *file) struct vfsmount *mnt = file->f_path.mnt; struct inode *inode = file->f_inode; + if (unlikely(!(file->f_mode & FMODE_OPENED))) + goto out; + might_sleep(); fsnotify_close(file); @@ -207,7 +249,6 @@ static void __fput(struct file *file) } if (file->f_op->release) file->f_op->release(inode, file); - security_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(file->f_mode & FMODE_PATH))) { cdev_put(inode->i_cdev); @@ -220,12 +261,10 @@ static void __fput(struct file *file) put_write_access(inode); __mnt_drop_write(mnt); } - file->f_path.dentry = NULL; - file->f_path.mnt = NULL; - file->f_inode = NULL; - file_free(file); dput(dentry); mntput(mnt); +out: + file_free(file); } static LLIST_HEAD(delayed_fput_list); @@ -300,14 +339,6 @@ void __fput_sync(struct file *file) EXPORT_SYMBOL(fput); -void put_filp(struct file *file) -{ - if (atomic_long_dec_and_test(&file->f_count)) { - security_file_free(file); - file_free(file); - } -} - void __init files_init(void) { filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index c184c5a356ff..cdcb376ef8df 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -220,6 +220,7 @@ int fscache_add_cache(struct fscache_cache *cache, { struct fscache_cache_tag *tag; + ASSERTCMP(ifsdef->cookie, ==, &fscache_fsdef_index); BUG_ON(!cache->ops); BUG_ON(!ifsdef); @@ -248,7 +249,6 @@ int fscache_add_cache(struct fscache_cache *cache, if (!cache->kobj) goto error; - ifsdef->cookie = &fscache_fsdef_index; ifsdef->cache = cache; cache->fsdef = ifsdef; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 97137d7ec5ee..83bfe04456b6 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -516,6 +516,7 @@ static int fscache_alloc_object(struct fscache_cache *cache, goto error; } + ASSERTCMP(object->cookie, ==, cookie); fscache_stat(&fscache_n_object_alloc); object->debug_id = atomic_inc_return(&fscache_object_debug_id); @@ -571,6 +572,8 @@ static int fscache_attach_object(struct fscache_cookie *cookie, _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); + ASSERTCMP(object->cookie, ==, cookie); + spin_lock(&cookie->lock); /* there may be multiple initial creations of this object, but we only @@ -610,9 +613,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, spin_unlock(&cache->object_list_lock); } - /* attach to the cookie */ - object->cookie = cookie; - fscache_cookie_get(cookie, fscache_cookie_get_attach_object); + /* Attach to the cookie. The object already has a ref on it. */ hlist_add_head(&object->cookie_link, &cookie->backing_objects); fscache_objlist_add(object); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 20e0d0a4dc8c..9edc920f651f 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -327,6 +327,7 @@ void fscache_object_init(struct fscache_object *object, object->store_limit_l = 0; object->cache = cache; object->cookie = cookie; + fscache_cookie_get(cookie, fscache_cookie_get_attach_object); object->parent = NULL; #ifdef CONFIG_FSCACHE_OBJECT_LIST RB_CLEAR_NODE(&object->objlist_link); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index e30c5975ea58..8d265790374c 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -70,7 +70,8 @@ void fscache_enqueue_operation(struct fscache_operation *op) ASSERT(op->processor != NULL); ASSERT(fscache_object_is_available(op->object)); ASSERTCMP(atomic_read(&op->usage), >, 0); - ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); + ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS, + op->state, ==, FSCACHE_OP_ST_CANCELLED); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { @@ -499,7 +500,8 @@ void fscache_put_operation(struct fscache_operation *op) struct fscache_cache *cache; _enter("{OBJ%x OP%x,%d}", - op->object->debug_id, op->debug_id, atomic_read(&op->usage)); + op->object ? op->object->debug_id : 0, + op->debug_id, atomic_read(&op->usage)); ASSERTCMP(atomic_read(&op->usage), >, 0); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 56231b31f806..d80aab0d5982 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -399,7 +399,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, */ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, - umode_t mode, int *opened) + umode_t mode) { int err; struct inode *inode; @@ -469,7 +469,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); - err = finish_open(file, entry, generic_file_open, opened); + err = finish_open(file, entry, generic_file_open); if (err) { fuse_sync_release(ff, flags); } else { @@ -489,7 +489,7 @@ out_err: static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, - umode_t mode, int *opened) + umode_t mode) { int err; struct fuse_conn *fc = get_fuse_conn(dir); @@ -508,12 +508,12 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, goto no_open; /* Only creates */ - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; if (fc->no_create) goto mknod; - err = fuse_create_open(dir, entry, file, flags, mode, opened); + err = fuse_create_open(dir, entry, file, flags, mode); if (err == -ENOSYS) { fc->no_create = 1; goto mknod; @@ -539,6 +539,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, { struct fuse_entry_out outarg; struct inode *inode; + struct dentry *d; int err; struct fuse_forget_link *forget; @@ -570,11 +571,17 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, } kfree(forget); - err = d_instantiate_no_diralias(entry, inode); - if (err) - return err; + d_drop(entry); + d = d_splice_alias(inode, entry); + if (IS_ERR(d)) + return PTR_ERR(d); - fuse_change_entry_timeout(entry, &outarg); + if (d) { + fuse_change_entry_timeout(d, &outarg); + dput(d); + } else { + fuse_change_entry_timeout(entry, &outarg); + } fuse_invalidate_attr(dir); return 0; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ffcaf98044b9..a24df8861b40 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -217,7 +217,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, return; } - old_mtime = inode->i_mtime; + old_mtime = timespec64_to_timespec(inode->i_mtime); fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 3090c445e8fc..d97ad89955d1 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -871,7 +871,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, struct buffer_head *bh; struct gfs2_leaf *leaf; struct gfs2_dirent *dent; - struct timespec tv = current_time(inode); + struct timespec64 tv = current_time(inode); error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); if (error) @@ -1802,7 +1802,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *bh = da->bh; struct gfs2_dirent *dent = da->dent; - struct timespec tv; + struct timespec64 tv; struct gfs2_leaf *leaf; int error; @@ -1880,7 +1880,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) const struct qstr *name = &dentry->d_name; struct gfs2_dirent *dent, *prev = NULL; struct buffer_head *bh; - struct timespec tv = current_time(&dip->i_inode); + struct timespec64 tv = current_time(&dip->i_inode); /* Returns _either_ the entry (if its first in block) or the previous entry otherwise */ diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index d8782a7a1e7d..c63bee9adb6a 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -338,7 +338,7 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl) static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) { const struct gfs2_dinode *str = buf; - struct timespec atime; + struct timespec64 atime; u16 height, depth; if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) @@ -361,7 +361,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); atime.tv_sec = be64_to_cpu(str->di_atime); atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); - if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) + if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0) ip->i_inode.i_atime = atime; ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index feda55f67050..648f0ca1ad57 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -580,7 +580,7 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct file *file, umode_t mode, dev_t dev, const char *symname, - unsigned int size, int excl, int *opened) + unsigned int size, int excl) { const struct qstr *name = &dentry->d_name; struct posix_acl *default_acl, *acl; @@ -626,7 +626,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = 0; if (file) { if (S_ISREG(inode->i_mode)) - error = finish_open(file, dentry, gfs2_open_common, opened); + error = finish_open(file, dentry, gfs2_open_common); else error = finish_no_open(file, NULL); } @@ -767,8 +767,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, mark_inode_dirty(inode); d_instantiate(dentry, inode); if (file) { - *opened |= FILE_CREATED; - error = finish_open(file, dentry, gfs2_open_common, opened); + file->f_mode |= FMODE_CREATED; + error = finish_open(file, dentry, gfs2_open_common); } gfs2_glock_dq_uninit(ghs); gfs2_glock_dq_uninit(ghs + 1); @@ -822,7 +822,7 @@ fail: static int gfs2_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl, NULL); + return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl); } /** @@ -830,14 +830,13 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, * @dir: The directory inode * @dentry: The dentry of the new inode * @file: File to be opened - * @opened: atomic_open flags * * * Returns: errno */ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, - struct file *file, int *opened) + struct file *file) { struct inode *inode; struct dentry *d; @@ -866,7 +865,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, return d; } if (file && S_ISREG(inode->i_mode)) - error = finish_open(file, dentry, gfs2_open_common, opened); + error = finish_open(file, dentry, gfs2_open_common); gfs2_glock_dq_uninit(&gh); if (error) { @@ -879,7 +878,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) { - return __gfs2_lookup(dir, dentry, NULL, NULL); + return __gfs2_lookup(dir, dentry, NULL); } /** @@ -1189,7 +1188,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, if (size >= gfs2_max_stuffed_size(GFS2_I(dir))) return -ENAMETOOLONG; - return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL); + return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0); } /** @@ -1204,7 +1203,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir)); - return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL); + return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0); } /** @@ -1219,7 +1218,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0, NULL); + return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0); } /** @@ -1229,14 +1228,13 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, * @file: The proposed new struct file * @flags: open flags * @mode: File mode - * @opened: Flag to say whether the file has been opened or not * * Returns: error code or 0 for success */ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, - umode_t mode, int *opened) + umode_t mode) { struct dentry *d; bool excl = !!(flags & O_EXCL); @@ -1244,13 +1242,13 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (!d_in_lookup(dentry)) goto skip_lookup; - d = __gfs2_lookup(dir, dentry, file, opened); + d = __gfs2_lookup(dir, dentry, file); if (IS_ERR(d)) return PTR_ERR(d); if (d != NULL) dentry = d; if (d_really_is_positive(dentry)) { - if (!(*opened & FILE_OPENED)) + if (!(file->f_mode & FMODE_OPENED)) return finish_no_open(file, d); dput(d); return 0; @@ -1262,7 +1260,7 @@ skip_lookup: if (!(flags & O_CREAT)) return -ENOENT; - return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl, opened); + return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl); } /* diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index b3309b83371a..a2dfa1b2a89c 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -351,7 +351,7 @@ static int hfs_read_inode(struct inode *inode, void *data) inode->i_mode &= ~hsb->s_file_umask; inode->i_mode |= S_IFREG; inode->i_ctime = inode->i_atime = inode->i_mtime = - hfs_m_to_utime(rec->file.MdDat); + timespec_to_timespec64(hfs_m_to_utime(rec->file.MdDat)); inode->i_op = &hfs_file_inode_operations; inode->i_fop = &hfs_file_operations; inode->i_mapping->a_ops = &hfs_aops; @@ -362,7 +362,7 @@ static int hfs_read_inode(struct inode *inode, void *data) HFS_I(inode)->fs_blocks = 0; inode->i_mode = S_IFDIR | (S_IRWXUGO & ~hsb->s_dir_umask); inode->i_ctime = inode->i_atime = inode->i_mtime = - hfs_m_to_utime(rec->dir.MdDat); + timespec_to_timespec64(hfs_m_to_utime(rec->dir.MdDat)); inode->i_op = &hfs_dir_inode_operations; inode->i_fop = &hfs_dir_operations; break; @@ -541,7 +541,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, HFS_I(inode)->rsrc_inode = dir; HFS_I(dir)->rsrc_inode = inode; igrab(dir); - hlist_add_fake(&inode->i_hash); + inode_fake_hash(inode); mark_inode_dirty(inode); dont_mount(dentry); out: diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index c0c8d433864f..c824f702feec 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -493,9 +493,9 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) hfsplus_get_perms(inode, &folder->permissions, 1); set_nlink(inode, 1); inode->i_size = 2 + be32_to_cpu(folder->valence); - inode->i_atime = hfsp_mt2ut(folder->access_date); - inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); - inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); + inode->i_atime = timespec_to_timespec64(hfsp_mt2ut(folder->access_date)); + inode->i_mtime = timespec_to_timespec64(hfsp_mt2ut(folder->content_mod_date)); + inode->i_ctime = timespec_to_timespec64(hfsp_mt2ut(folder->attribute_mod_date)); HFSPLUS_I(inode)->create_date = folder->create_date; HFSPLUS_I(inode)->fs_blocks = 0; if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { @@ -531,9 +531,9 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) init_special_inode(inode, inode->i_mode, be32_to_cpu(file->permissions.dev)); } - inode->i_atime = hfsp_mt2ut(file->access_date); - inode->i_mtime = hfsp_mt2ut(file->content_mod_date); - inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); + inode->i_atime = timespec_to_timespec64(hfsp_mt2ut(file->access_date)); + inode->i_mtime = timespec_to_timespec64(hfsp_mt2ut(file->content_mod_date)); + inode->i_ctime = timespec_to_timespec64(hfsp_mt2ut(file->attribute_mod_date)); HFSPLUS_I(inode)->create_date = file->create_date; } else { pr_err("bad catalog entry used to create inode\n"); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 3cd85eb5bbb1..444c7b170359 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -555,9 +555,9 @@ static int read_name(struct inode *ino, char *name) set_nlink(ino, st.nlink); i_uid_write(ino, st.uid); i_gid_write(ino, st.gid); - ino->i_atime = st.atime; - ino->i_mtime = st.mtime; - ino->i_ctime = st.ctime; + ino->i_atime = timespec_to_timespec64(st.atime); + ino->i_mtime = timespec_to_timespec64(st.mtime); + ino->i_ctime = timespec_to_timespec64(st.ctime); ino->i_size = st.size; ino->i_blocks = st.blocks; return 0; @@ -610,33 +610,21 @@ static struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, int err; inode = hostfs_iget(ino->i_sb); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); + if (IS_ERR(inode)) goto out; - } err = -ENOMEM; name = dentry_name(dentry); - if (name == NULL) - goto out_put; - - err = read_name(inode, name); - - __putname(name); - if (err == -ENOENT) { + if (name) { + err = read_name(inode, name); + __putname(name); + } + if (err) { iput(inode); - inode = NULL; + inode = (err == -ENOENT) ? NULL : ERR_PTR(err); } - else if (err) - goto out_put; - - d_add(dentry, inode); - return NULL; - - out_put: - iput(inode); out: - return ERR_PTR(err); + return d_splice_alias(inode, dentry); } static int hostfs_link(struct dentry *to, struct inode *ino, @@ -838,15 +826,15 @@ static int hostfs_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_ATIME) { attrs.ia_valid |= HOSTFS_ATTR_ATIME; - attrs.ia_atime = attr->ia_atime; + attrs.ia_atime = timespec64_to_timespec(attr->ia_atime); } if (attr->ia_valid & ATTR_MTIME) { attrs.ia_valid |= HOSTFS_ATTR_MTIME; - attrs.ia_mtime = attr->ia_mtime; + attrs.ia_mtime = timespec64_to_timespec(attr->ia_mtime); } if (attr->ia_valid & ATTR_CTIME) { attrs.ia_valid |= HOSTFS_ATTR_CTIME; - attrs.ia_ctime = attr->ia_ctime; + attrs.ia_ctime = timespec64_to_timespec(attr->ia_ctime); } if (attr->ia_valid & ATTR_ATIME_SET) { attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index c83ece7facc5..d85230c84ef2 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -244,6 +244,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in result = iget_locked(dir->i_sb, ino); if (!result) { hpfs_error(dir->i_sb, "hpfs_lookup: can't get inode"); + result = ERR_PTR(-ENOMEM); goto bail1; } if (result->i_state & I_NEW) { @@ -266,6 +267,8 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in if (de->has_acl || de->has_xtd_perm) if (!sb_rdonly(dir->i_sb)) { hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures"); + iput(result); + result = ERR_PTR(-EINVAL); goto bail1; } @@ -301,29 +304,17 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in } } +bail1: hpfs_brelse4(&qbh); /* * Made it. */ - end: - end_add: +end: +end_add: hpfs_unlock(dir->i_sb); - d_add(dentry, result); - return NULL; - - /* - * Didn't. - */ - bail1: - - hpfs_brelse4(&qbh); - - /*bail:*/ - - hpfs_unlock(dir->i_sb); - return ERR_PTR(-ENOENT); + return d_splice_alias(result, dentry); } const struct file_operations hpfs_dir_ops = diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d508c7844681..346a146c7617 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -411,6 +411,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, bool truncate_op = (lend == LLONG_MAX); memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, current->mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pagevec_init(&pvec); next = start; @@ -595,6 +596,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, * as input to create an allocation policy. */ memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pseudo_vma.vm_file = file; @@ -1308,10 +1310,6 @@ static int get_hstate_idx(int page_size_log) return h - hstates; } -static const struct dentry_operations anon_ops = { - .d_dname = simple_dname -}; - /* * Note that size should be aligned to proper hugepage size in caller side, * otherwise hugetlb_reserve_pages reserves one less hugepages than intended. @@ -1320,19 +1318,18 @@ struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, struct user_struct **user, int creat_flags, int page_size_log) { - struct file *file = ERR_PTR(-ENOMEM); struct inode *inode; - struct path path; - struct super_block *sb; - struct qstr quick_string; + struct vfsmount *mnt; int hstate_idx; + struct file *file; hstate_idx = get_hstate_idx(page_size_log); if (hstate_idx < 0) return ERR_PTR(-ENODEV); *user = NULL; - if (!hugetlbfs_vfsmount[hstate_idx]) + mnt = hugetlbfs_vfsmount[hstate_idx]; + if (!mnt) return ERR_PTR(-ENOENT); if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { @@ -1348,45 +1345,28 @@ struct file *hugetlb_file_setup(const char *name, size_t size, } } - sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb; - quick_string.name = name; - quick_string.len = strlen(quick_string.name); - quick_string.hash = 0; - path.dentry = d_alloc_pseudo(sb, &quick_string); - if (!path.dentry) - goto out_shm_unlock; - - d_set_d_op(path.dentry, &anon_ops); - path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]); file = ERR_PTR(-ENOSPC); - inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0); + inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0); if (!inode) - goto out_dentry; + goto out; if (creat_flags == HUGETLB_SHMFS_INODE) inode->i_flags |= S_PRIVATE; - file = ERR_PTR(-ENOMEM); - if (hugetlb_reserve_pages(inode, 0, - size >> huge_page_shift(hstate_inode(inode)), NULL, - acctflag)) - goto out_inode; - - d_instantiate(path.dentry, inode); inode->i_size = size; clear_nlink(inode); - file = alloc_file(&path, FMODE_WRITE | FMODE_READ, - &hugetlbfs_file_operations); - if (IS_ERR(file)) - goto out_dentry; /* inode is already attached */ - - return file; + if (hugetlb_reserve_pages(inode, 0, + size >> huge_page_shift(hstate_inode(inode)), NULL, + acctflag)) + file = ERR_PTR(-ENOMEM); + else + file = alloc_file_pseudo(inode, mnt, name, O_RDWR, + &hugetlbfs_file_operations); + if (!IS_ERR(file)) + return file; -out_inode: iput(inode); -out_dentry: - path_put(&path); -out_shm_unlock: +out: if (*user) { user_shm_unlock(size, *user); *user = NULL; diff --git a/fs/inode.c b/fs/inode.c index 0df41bb77e0f..a06de4454232 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -804,6 +804,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } + if (unlikely(inode->i_state & I_CREATING)) { + spin_unlock(&inode->i_lock); + return ERR_PTR(-ESTALE); + } __iget(inode); spin_unlock(&inode->i_lock); return inode; @@ -831,6 +835,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } + if (unlikely(inode->i_state & I_CREATING)) { + spin_unlock(&inode->i_lock); + return ERR_PTR(-ESTALE); + } __iget(inode); spin_unlock(&inode->i_lock); return inode; @@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode) lockdep_annotate_inode_mutex_key(inode); spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); - inode->i_state &= ~I_NEW; + inode->i_state &= ~I_NEW & ~I_CREATING; smp_mb(); wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(unlock_new_inode); +void discard_new_inode(struct inode *inode) +{ + lockdep_annotate_inode_mutex_key(inode); + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_NEW)); + inode->i_state &= ~I_NEW; + smp_mb(); + wake_up_bit(&inode->i_state, __I_NEW); + spin_unlock(&inode->i_lock); + iput(inode); +} +EXPORT_SYMBOL(discard_new_inode); + /** * lock_two_nondirectories - take two i_mutexes on non-directory objects * @@ -1029,6 +1050,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, { struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); struct inode *old; + bool creating = inode->i_state & I_CREATING; again: spin_lock(&inode_hash_lock); @@ -1039,6 +1061,8 @@ again: * Use the old inode instead of the preallocated one. */ spin_unlock(&inode_hash_lock); + if (IS_ERR(old)) + return NULL; wait_on_inode(old); if (unlikely(inode_unhashed(old))) { iput(old); @@ -1060,6 +1084,8 @@ again: inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); + if (!creating) + inode_sb_list_add(inode); unlock: spin_unlock(&inode_hash_lock); @@ -1094,12 +1120,13 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, struct inode *inode = ilookup5(sb, hashval, test, data); if (!inode) { - struct inode *new = new_inode(sb); + struct inode *new = alloc_inode(sb); if (new) { + new->i_state = 0; inode = inode_insert5(new, hashval, test, set, data); if (unlikely(inode != new)) - iput(new); + destroy_inode(new); } } return inode; @@ -1128,6 +1155,8 @@ again: inode = find_inode_fast(sb, head, ino); spin_unlock(&inode_hash_lock); if (inode) { + if (IS_ERR(inode)) + return NULL; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); @@ -1165,6 +1194,8 @@ again: */ spin_unlock(&inode_hash_lock); destroy_inode(inode); + if (IS_ERR(old)) + return NULL; inode = old; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { @@ -1282,7 +1313,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, inode = find_inode(sb, head, test, data); spin_unlock(&inode_hash_lock); - return inode; + return IS_ERR(inode) ? NULL : inode; } EXPORT_SYMBOL(ilookup5_nowait); @@ -1338,6 +1369,8 @@ again: spin_unlock(&inode_hash_lock); if (inode) { + if (IS_ERR(inode)) + return NULL; wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { iput(inode); @@ -1421,12 +1454,17 @@ int insert_inode_locked(struct inode *inode) } if (likely(!old)) { spin_lock(&inode->i_lock); - inode->i_state |= I_NEW; + inode->i_state |= I_NEW | I_CREATING; hlist_add_head(&inode->i_hash, head); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); return 0; } + if (unlikely(old->i_state & I_CREATING)) { + spin_unlock(&old->i_lock); + spin_unlock(&inode_hash_lock); + return -EBUSY; + } __iget(old); spin_unlock(&old->i_lock); spin_unlock(&inode_hash_lock); @@ -1443,7 +1481,10 @@ EXPORT_SYMBOL(insert_inode_locked); int insert_inode_locked4(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct inode *old = inode_insert5(inode, hashval, test, NULL, data); + struct inode *old; + + inode->i_state |= I_CREATING; + old = inode_insert5(inode, hashval, test, NULL, data); if (old != inode) { iput(old); @@ -1577,8 +1618,8 @@ static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode, if (upperdentry) { struct inode *realinode = d_inode(upperdentry); - if ((!timespec_equal(&inode->i_mtime, &realinode->i_mtime) || - !timespec_equal(&inode->i_ctime, &realinode->i_ctime))) { + if ((!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) || + !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) { inode->i_mtime = realinode->i_mtime; inode->i_ctime = realinode->i_ctime; } @@ -1601,12 +1642,12 @@ static int relatime_need_update(const struct path *path, struct inode *inode, /* * Is mtime younger than atime? If yes, update atime: */ - if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0) + if (timespec64_compare(&inode->i_mtime, &inode->i_atime) >= 0) return 1; /* * Is ctime younger than atime? If yes, update atime: */ - if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0) + if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0) return 1; /* @@ -1621,7 +1662,7 @@ static int relatime_need_update(const struct path *path, struct inode *inode, return 0; } -int generic_update_time(struct inode *inode, struct timespec *time, int flags) +int generic_update_time(struct inode *inode, struct timespec64 *time, int flags) { int iflags = I_DIRTY_TIME; bool dirty = false; @@ -1649,9 +1690,9 @@ EXPORT_SYMBOL(generic_update_time); * This does the actual work of updating an inodes time or version. Must have * had called mnt_want_write() before calling this. */ -static int update_time(struct inode *inode, struct timespec *time, int flags) +static int update_time(struct inode *inode, struct timespec64 *time, int flags) { - int (*update_time)(struct inode *, struct timespec *, int); + int (*update_time)(struct inode *, struct timespec64 *, int); update_time = inode->i_op->update_time ? inode->i_op->update_time : generic_update_time; @@ -1672,7 +1713,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode, bool rcu) { struct vfsmount *mnt = path->mnt; - struct timespec now; + struct timespec64 now; if (inode->i_flags & S_NOATIME) return false; @@ -1695,10 +1736,10 @@ bool __atime_needs_update(const struct path *path, struct inode *inode, now = current_time(inode); - if (!relatime_need_update(path, inode, now, rcu)) + if (!relatime_need_update(path, inode, timespec64_to_timespec(now), rcu)) return false; - if (timespec_equal(&inode->i_atime, &now)) + if (timespec64_equal(&inode->i_atime, &now)) return false; return true; @@ -1708,7 +1749,7 @@ void touch_atime(const struct path *path) { struct vfsmount *mnt = path->mnt; struct inode *inode = d_inode(path->dentry); - struct timespec now; + struct timespec64 now; if (!__atime_needs_update(path, inode, false)) return; @@ -1842,7 +1883,7 @@ EXPORT_SYMBOL(file_remove_privs); int file_update_time(struct file *file) { struct inode *inode = file_inode(file); - struct timespec now; + struct timespec64 now; int sync_it = 0; int ret; @@ -1851,10 +1892,10 @@ int file_update_time(struct file *file) return 0; now = current_time(inode); - if (!timespec_equal(&inode->i_mtime, &now)) + if (!timespec64_equal(&inode->i_mtime, &now)) sync_it = S_MTIME; - if (!timespec_equal(&inode->i_ctime, &now)) + if (!timespec64_equal(&inode->i_ctime, &now)) sync_it |= S_CTIME; if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) @@ -1999,8 +2040,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, inode->i_uid = current_fsuid(); if (dir && dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; + + /* Directories are special, and always inherit S_ISGID */ if (S_ISDIR(mode)) mode |= S_ISGID; + else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && + !in_group_p(inode->i_gid) && + !capable_wrt_inode_uidgid(dir, CAP_FSETID)) + mode &= ~S_ISGID; } else inode->i_gid = current_fsgid(); inode->i_mode = mode; @@ -2098,6 +2145,30 @@ void inode_nohighmem(struct inode *inode) EXPORT_SYMBOL(inode_nohighmem); /** + * timespec64_trunc - Truncate timespec64 to a granularity + * @t: Timespec64 + * @gran: Granularity in ns. + * + * Truncate a timespec64 to a granularity. Always rounds down. gran must + * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns). + */ +struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran) +{ + /* Avoid division in the common cases 1 ns and 1 s. */ + if (gran == 1) { + /* nothing */ + } else if (gran == NSEC_PER_SEC) { + t.tv_nsec = 0; + } else if (gran > 1 && gran < NSEC_PER_SEC) { + t.tv_nsec -= t.tv_nsec % gran; + } else { + WARN(1, "illegal file time granularity: %u", gran); + } + return t; +} +EXPORT_SYMBOL(timespec64_trunc); + +/** * current_time - Return FS time * @inode: inode. * @@ -2107,15 +2178,15 @@ EXPORT_SYMBOL(inode_nohighmem); * Note that inode and inode->sb cannot be NULL. * Otherwise, the function warns and returns time without truncation. */ -struct timespec current_time(struct inode *inode) +struct timespec64 current_time(struct inode *inode) { - struct timespec now = current_kernel_time(); + struct timespec64 now = current_kernel_time64(); if (unlikely(!inode->i_sb)) { WARN(1, "current_time() called with uninitialized super_block in the inode"); return now; } - return timespec_trunc(now, inode->i_sb->s_time_gran); + return timespec64_trunc(now, inode->i_sb->s_time_gran); } EXPORT_SYMBOL(current_time); diff --git a/fs/internal.h b/fs/internal.h index 980d005b21b4..52a346903748 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -93,7 +93,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ -extern struct file *get_empty_filp(void); +extern struct file *alloc_empty_file(int, const struct cred *); /* * super.c @@ -125,9 +125,7 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); -extern int open_check_o_direct(struct file *f); -extern int vfs_open(const struct path *, struct file *, const struct cred *); -extern struct file *filp_clone_open(struct file *); +extern int vfs_open(const struct path *, struct file *); /* * inode.c diff --git a/fs/iomap.c b/fs/iomap.c index 77397b5a96ef..0d0bd8845586 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1443,7 +1443,7 @@ iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops) { struct inode *inode = mapping->host; - loff_t pos = bno >> inode->i_blkbits; + loff_t pos = bno << inode->i_blkbits; unsigned blocksize = i_blocksize(inode); if (filemap_write_and_wait(mapping)) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 51dd68e67b0f..c0b66a7a795b 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1361,6 +1361,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) if (jh->b_transaction == transaction && jh->b_jlist != BJ_Metadata) { jbd_lock_bh_state(bh); + if (jh->b_transaction == transaction && + jh->b_jlist != BJ_Metadata) + pr_err("JBD2: assertion failure: h_type=%u " + "h_line_no=%u block_no=%llu jlist=%u\n", + handle->h_type, handle->h_line_no, + (unsigned long long) bh->b_blocknr, + jh->b_jlist); J_ASSERT_JH(jh, jh->b_transaction != transaction || jh->b_jlist == BJ_Metadata); jbd_unlock_bh_state(bh); @@ -1380,11 +1387,11 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) * of the transaction. This needs to be done * once a transaction -bzzz */ - jh->b_modified = 1; if (handle->h_buffer_credits <= 0) { ret = -ENOSPC; goto out_unlock_bh; } + jh->b_modified = 1; handle->h_buffer_credits--; } diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index e5a6deb38e1e..b2944f9218f7 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -201,7 +201,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, if (ret) goto fail; - dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime)); + dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->ctime))); jffs2_free_raw_inode(ri); @@ -234,7 +234,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry) if (dead_f->inocache) set_nlink(d_inode(dentry), dead_f->inocache->pino_nlink); if (!ret) - dir_i->i_mtime = dir_i->i_ctime = ITIME(now); + dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); return ret; } /***********************************************************************/ @@ -268,7 +268,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de set_nlink(d_inode(old_dentry), ++f->inocache->pino_nlink); mutex_unlock(&f->sem); d_instantiate(dentry, d_inode(old_dentry)); - dir_i->i_mtime = dir_i->i_ctime = ITIME(now); + dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); ihold(d_inode(old_dentry)); } return ret; @@ -418,7 +418,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char goto fail; } - dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); + dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(rd->mctime))); jffs2_free_raw_dirent(rd); @@ -561,7 +561,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode goto fail; } - dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); + dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(rd->mctime))); inc_nlink(dir_i); jffs2_free_raw_dirent(rd); @@ -598,7 +598,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, dentry->d_name.len, f, now); if (!ret) { - dir_i->i_mtime = dir_i->i_ctime = ITIME(now); + dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); clear_nlink(d_inode(dentry)); drop_nlink(dir_i); } @@ -733,7 +733,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode goto fail; } - dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime)); + dir_i->i_mtime = dir_i->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(rd->mctime))); jffs2_free_raw_dirent(rd); @@ -853,14 +853,14 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, * caller won't do it on its own since we are returning an error. */ d_invalidate(new_dentry); - new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); + new_dir_i->i_mtime = new_dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); return ret; } if (d_is_dir(old_dentry)) drop_nlink(old_dir_i); - new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now); + new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = timespec_to_timespec64(ITIME(now)); return 0; } diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index bd0428bebe9b..481afd4c2e1a 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -308,7 +308,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping, inode->i_size = pos + writtenlen; inode->i_blocks = (inode->i_size + 511) >> 9; - inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime)); + inode->i_ctime = inode->i_mtime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->ctime))); } } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index eab04eca95a3..0ecfb8ea38cd 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -146,9 +146,9 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) return PTR_ERR(new_metadata); } /* It worked. Update the inode */ - inode->i_atime = ITIME(je32_to_cpu(ri->atime)); - inode->i_ctime = ITIME(je32_to_cpu(ri->ctime)); - inode->i_mtime = ITIME(je32_to_cpu(ri->mtime)); + inode->i_atime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->atime))); + inode->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->ctime))); + inode->i_mtime = timespec_to_timespec64(ITIME(je32_to_cpu(ri->mtime))); inode->i_mode = jemode_to_cpu(ri->mode); i_uid_write(inode, je16_to_cpu(ri->uid)); i_gid_write(inode, je16_to_cpu(ri->gid)); @@ -280,9 +280,9 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) i_uid_write(inode, je16_to_cpu(latest_node.uid)); i_gid_write(inode, je16_to_cpu(latest_node.gid)); inode->i_size = je32_to_cpu(latest_node.isize); - inode->i_atime = ITIME(je32_to_cpu(latest_node.atime)); - inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); - inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); + inode->i_atime = timespec_to_timespec64(ITIME(je32_to_cpu(latest_node.atime))); + inode->i_mtime = timespec_to_timespec64(ITIME(je32_to_cpu(latest_node.mtime))); + inode->i_ctime = timespec_to_timespec64(ITIME(je32_to_cpu(latest_node.ctime))); set_nlink(inode, f->inocache->pino_nlink); diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 395c4c0d0f06..1682a87c00b2 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -115,6 +115,13 @@ struct dinode { dxd_t _dxd; /* 16: */ union { __le32 _rdev; /* 4: */ + /* + * The fast symlink area + * is expected to overflow + * into _inlineea when + * needed (which will clear + * INLINEEA). + */ u8 _fastsymlink[128]; } _u; u8 _inlineea[128]; diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index f36ef68905a7..93e8c590ff5c 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -491,13 +491,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) /* release the page */ release_metapage(mp); - /* - * __mark_inode_dirty expects inodes to be hashed. Since we don't - * want special inodes in the fileset inode space, we make them - * appear hashed, but do not put on any lists. hlist_del() - * will work fine and require no locking. - */ - hlist_add_fake(&ip->i_hash); + inode_fake_hash(ip); return (ip); } diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 1f26d1910409..9940a1e04cbf 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -87,6 +87,7 @@ struct jfs_inode_info { struct { unchar _unused[16]; /* 16: */ dxd_t _dxd; /* 16: */ + /* _inline may overflow into _inline_ea when needed */ unchar _inline[128]; /* 128: inline symlink */ /* _inline_ea may overlay the last part of * file._xtroot if maxentry = XTROOTINITSLOT diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 5e9b7bb3aabf..4572b7cf183d 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -61,8 +61,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) inode = new_inode(sb); if (!inode) { jfs_warn("ialloc: new_inode returned NULL!"); - rc = -ENOMEM; - goto fail; + return ERR_PTR(-ENOMEM); } jfs_inode = JFS_IP(inode); @@ -70,8 +69,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) rc = diAlloc(parent, S_ISDIR(mode), inode); if (rc) { jfs_warn("ialloc: diAlloc returned %d!", rc); - if (rc == -EIO) - make_bad_inode(inode); goto fail_put; } @@ -141,9 +138,10 @@ fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; clear_nlink(inode); - unlock_new_inode(inode); + discard_new_inode(inode); + return ERR_PTR(rc); + fail_put: iput(inode); -fail: return ERR_PTR(rc); } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 56c3fcbfe80e..14528c0ffe63 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -175,8 +175,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } @@ -309,8 +308,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } @@ -1054,8 +1052,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } @@ -1441,8 +1438,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, if (rc) { free_ea_wmap(ip); clear_nlink(ip); - unlock_new_inode(ip); - iput(ip); + discard_new_inode(ip); } else { d_instantiate_new(dentry, ip); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 1b9264fd54b6..09da5cf14e27 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -581,7 +581,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) inode->i_ino = 0; inode->i_size = i_size_read(sb->s_bdev->bd_inode); inode->i_mapping->a_ops = &jfs_metapage_aops; - hlist_add_fake(&inode->i_hash); + inode_fake_hash(inode); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); sbi->direct_inode = inode; @@ -967,8 +967,7 @@ static int __init init_jfs_fs(void) jfs_inode_cachep = kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, - offsetof(struct jfs_inode_info, i_inline), - sizeof_field(struct jfs_inode_info, i_inline), + offsetof(struct jfs_inode_info, i_inline), IDATASIZE, init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index c60f3d32ee91..a6797986b625 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -491,15 +491,17 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) if (size > PSIZE) { /* * To keep the rest of the code simple. Allocate a - * contiguous buffer to work with + * contiguous buffer to work with. Make the buffer large + * enough to make use of the whole extent. */ - ea_buf->xattr = kmalloc(size, GFP_KERNEL); + ea_buf->max_size = (size + sb->s_blocksize - 1) & + ~(sb->s_blocksize - 1); + + ea_buf->xattr = kmalloc(ea_buf->max_size, GFP_KERNEL); if (ea_buf->xattr == NULL) return -ENOMEM; ea_buf->flag = EA_MALLOC; - ea_buf->max_size = (size + sb->s_blocksize - 1) & - ~(sb->s_blocksize - 1); if (ea_size == 0) return 0; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 89d1dc19340b..d66cc0777303 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -779,7 +779,7 @@ int kernfs_add_one(struct kernfs_node *kn) ps_iattr = parent->iattr; if (ps_iattr) { struct iattr *ps_iattrs = &ps_iattr->ia_iattr; - ktime_get_real_ts(&ps_iattrs->ia_ctime); + ktime_get_real_ts64(&ps_iattrs->ia_ctime); ps_iattrs->ia_mtime = ps_iattrs->ia_ctime; } @@ -1306,7 +1306,7 @@ static void __kernfs_remove(struct kernfs_node *kn) /* update timestamps on the parent */ if (ps_iattr) { - ktime_get_real_ts(&ps_iattr->ia_iattr.ia_ctime); + ktime_get_real_ts64(&ps_iattr->ia_iattr.ia_ctime); ps_iattr->ia_iattr.ia_mtime = ps_iattr->ia_iattr.ia_ctime; } diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index a34303981deb..3d73fe9d56e2 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -52,7 +52,7 @@ static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn) iattrs->ia_uid = GLOBAL_ROOT_UID; iattrs->ia_gid = GLOBAL_ROOT_GID; - ktime_get_real_ts(&iattrs->ia_atime); + ktime_get_real_ts64(&iattrs->ia_atime); iattrs->ia_mtime = iattrs->ia_atime; iattrs->ia_ctime = iattrs->ia_atime; @@ -176,9 +176,9 @@ static inline void set_inode_attr(struct inode *inode, struct iattr *iattr) struct super_block *sb = inode->i_sb; inode->i_uid = iattr->ia_uid; inode->i_gid = iattr->ia_gid; - inode->i_atime = timespec_trunc(iattr->ia_atime, sb->s_time_gran); - inode->i_mtime = timespec_trunc(iattr->ia_mtime, sb->s_time_gran); - inode->i_ctime = timespec_trunc(iattr->ia_ctime, sb->s_time_gran); + inode->i_atime = timespec64_trunc(iattr->ia_atime, sb->s_time_gran); + inode->i_mtime = timespec64_trunc(iattr->ia_mtime, sb->s_time_gran); + inode->i_ctime = timespec64_trunc(iattr->ia_ctime, sb->s_time_gran); } static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode) diff --git a/fs/locks.c b/fs/locks.c index 11a4d698aba8..bc047a7edc47 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1559,7 +1559,7 @@ EXPORT_SYMBOL(__break_lease); * exclusive leases. The justification is that if someone has an * exclusive lease, then they could be modifying it. */ -void lease_get_mtime(struct inode *inode, struct timespec *time) +void lease_get_mtime(struct inode *inode, struct timespec64 *time) { bool has_lease = false; struct file_lock_context *ctx; diff --git a/fs/namei.c b/fs/namei.c index 2490ddb8bc90..3cd396277cd3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2028,6 +2028,8 @@ static int link_path_walk(const char *name, struct nameidata *nd) { int err; + if (IS_ERR(name)) + return PTR_ERR(name); while (*name=='/') name++; if (!*name) @@ -2125,12 +2127,15 @@ OK: } } +/* must be paired with terminate_walk() */ static const char *path_init(struct nameidata *nd, unsigned flags) { const char *s = nd->name->name; if (!*s) flags &= ~LOOKUP_RCU; + if (flags & LOOKUP_RCU) + rcu_read_lock(); nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; @@ -2143,7 +2148,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); nd->root_seq = nd->seq; nd->m_seq = read_seqbegin(&mount_lock); @@ -2159,21 +2163,15 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->m_seq = read_seqbegin(&mount_lock); if (*s == '/') { - if (flags & LOOKUP_RCU) - rcu_read_lock(); set_root(nd); if (likely(!nd_jump_root(nd))) return s; - nd->root.mnt = NULL; - rcu_read_unlock(); return ERR_PTR(-ECHILD); } else if (nd->dfd == AT_FDCWD) { if (flags & LOOKUP_RCU) { struct fs_struct *fs = current->fs; unsigned seq; - rcu_read_lock(); - do { seq = read_seqcount_begin(&fs->seq); nd->path = fs->pwd; @@ -2195,16 +2193,13 @@ static const char *path_init(struct nameidata *nd, unsigned flags) dentry = f.file->f_path.dentry; - if (*s) { - if (!d_can_lookup(dentry)) { - fdput(f); - return ERR_PTR(-ENOTDIR); - } + if (*s && unlikely(!d_can_lookup(dentry))) { + fdput(f); + return ERR_PTR(-ENOTDIR); } nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - rcu_read_lock(); nd->inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } else { @@ -2272,24 +2267,15 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path const char *s = path_init(nd, flags); int err; - if (IS_ERR(s)) - return PTR_ERR(s); - - if (unlikely(flags & LOOKUP_DOWN)) { + if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) { err = handle_lookup_down(nd); - if (unlikely(err < 0)) { - terminate_walk(nd); - return err; - } + if (unlikely(err < 0)) + s = ERR_PTR(err); } while (!(err = link_path_walk(s, nd)) && ((err = lookup_last(nd)) > 0)) { s = trailing_symlink(nd); - if (IS_ERR(s)) { - err = PTR_ERR(s); - break; - } } if (!err) err = complete_walk(nd); @@ -2336,10 +2322,7 @@ static int path_parentat(struct nameidata *nd, unsigned flags, struct path *parent) { const char *s = path_init(nd, flags); - int err; - if (IS_ERR(s)) - return PTR_ERR(s); - err = link_path_walk(s, nd); + int err = link_path_walk(s, nd); if (!err) err = complete_walk(nd); if (!err) { @@ -2464,6 +2447,35 @@ static int lookup_one_len_common(const char *name, struct dentry *base, } /** + * try_lookup_one_len - filesystem helper to lookup single pathname component + * @name: pathname component to lookup + * @base: base directory to lookup from + * @len: maximum length @len should be interpreted to + * + * Look up a dentry by name in the dcache, returning NULL if it does not + * currently exist. The function does not try to create a dentry. + * + * Note that this routine is purely a helper for filesystem usage and should + * not be called by generic code. + * + * The caller must hold base->i_mutex. + */ +struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len) +{ + struct qstr this; + int err; + + WARN_ON_ONCE(!inode_is_locked(base->d_inode)); + + err = lookup_one_len_common(name, base, len, &this); + if (err) + return ERR_PTR(err); + + return lookup_dcache(&this, base, 0); +} +EXPORT_SYMBOL(try_lookup_one_len); + +/** * lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup * @base: base directory to lookup from @@ -2637,15 +2649,10 @@ path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path) { const char *s = path_init(nd, flags); int err; - if (IS_ERR(s)) - return PTR_ERR(s); + while (!(err = link_path_walk(s, nd)) && (err = mountpoint_last(nd)) > 0) { s = trailing_symlink(nd); - if (IS_ERR(s)) { - err = PTR_ERR(s); - break; - } } if (!err) { *path = nd->path; @@ -2998,17 +3005,16 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m * Returns 0 if successful. The file will have been created and attached to * @file by the filesystem calling finish_open(). * - * Returns 1 if the file was looked up only or didn't need creating. The - * caller will need to perform the open themselves. @path will have been - * updated to point to the new dentry. This may be negative. + * If the file was looked up only or didn't need creating, FMODE_OPENED won't + * be set. The caller will need to perform the open themselves. @path will + * have been updated to point to the new dentry. This may be negative. * * Returns an error code otherwise. */ static int atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct file *file, const struct open_flags *op, - int open_flag, umode_t mode, - int *opened) + int open_flag, umode_t mode) { struct dentry *const DENTRY_NOT_SET = (void *) -1UL; struct inode *dir = nd->path.dentry->d_inode; @@ -3023,39 +3029,38 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, file->f_path.dentry = DENTRY_NOT_SET; file->f_path.mnt = nd->path.mnt; error = dir->i_op->atomic_open(dir, dentry, file, - open_to_namei_flags(open_flag), - mode, opened); + open_to_namei_flags(open_flag), mode); d_lookup_done(dentry); if (!error) { - /* - * We didn't have the inode before the open, so check open - * permission here. - */ - int acc_mode = op->acc_mode; - if (*opened & FILE_CREATED) { - WARN_ON(!(open_flag & O_CREAT)); - fsnotify_create(dir, dentry); - acc_mode = 0; - } - error = may_open(&file->f_path, acc_mode, open_flag); - if (WARN_ON(error > 0)) - error = -EINVAL; - } else if (error > 0) { - if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { + if (file->f_mode & FMODE_OPENED) { + /* + * We didn't have the inode before the open, so check open + * permission here. + */ + int acc_mode = op->acc_mode; + if (file->f_mode & FMODE_CREATED) { + WARN_ON(!(open_flag & O_CREAT)); + fsnotify_create(dir, dentry); + acc_mode = 0; + } + error = may_open(&file->f_path, acc_mode, open_flag); + if (WARN_ON(error > 0)) + error = -EINVAL; + } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { error = -EIO; } else { if (file->f_path.dentry) { dput(dentry); dentry = file->f_path.dentry; } - if (*opened & FILE_CREATED) + if (file->f_mode & FMODE_CREATED) fsnotify_create(dir, dentry); if (unlikely(d_is_negative(dentry))) { error = -ENOENT; } else { path->dentry = dentry; path->mnt = nd->path.mnt; - return 1; + return 0; } } } @@ -3066,25 +3071,22 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, /* * Look up and maybe create and open the last component. * - * Must be called with i_mutex held on parent. - * - * Returns 0 if the file was successfully atomically created (if necessary) and - * opened. In this case the file will be returned attached to @file. + * Must be called with parent locked (exclusive in O_CREAT case). * - * Returns 1 if the file was not completely opened at this time, though lookups - * and creations will have been performed and the dentry returned in @path will - * be positive upon return if O_CREAT was specified. If O_CREAT wasn't - * specified then a negative dentry may be returned. + * Returns 0 on success, that is, if + * the file was successfully atomically created (if necessary) and opened, or + * the file was not completely opened at this time, though lookups and + * creations were performed. + * These case are distinguished by presence of FMODE_OPENED on file->f_mode. + * In the latter case dentry returned in @path might be negative if O_CREAT + * hadn't been specified. * - * An error code is returned otherwise. - * - * FILE_CREATE will be set in @*opened if the dentry was created and will be - * cleared otherwise prior to returning. + * An error code is returned on failure. */ static int lookup_open(struct nameidata *nd, struct path *path, struct file *file, const struct open_flags *op, - bool got_write, int *opened) + bool got_write) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -3097,7 +3099,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, if (unlikely(IS_DEADDIR(dir_inode))) return -ENOENT; - *opened &= ~FILE_CREATED; + file->f_mode &= ~FMODE_CREATED; dentry = d_lookup(dir, &nd->last); for (;;) { if (!dentry) { @@ -3159,7 +3161,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, if (dir_inode->i_op->atomic_open) { error = atomic_open(nd, dentry, path, file, op, open_flag, - mode, opened); + mode); if (unlikely(error == -ENOENT) && create_error) error = create_error; return error; @@ -3182,7 +3184,7 @@ no_open: /* Negative dentry, just create the file */ if (!dentry->d_inode && (open_flag & O_CREAT)) { - *opened |= FILE_CREATED; + file->f_mode |= FMODE_CREATED; audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE); if (!dir_inode->i_op->create) { error = -EACCES; @@ -3201,7 +3203,7 @@ no_open: out_no_open: path->dentry = dentry; path->mnt = nd->path.mnt; - return 1; + return 0; out_dput: dput(dentry); @@ -3212,8 +3214,7 @@ out_dput: * Handle the last step of open() */ static int do_last(struct nameidata *nd, - struct file *file, const struct open_flags *op, - int *opened) + struct file *file, const struct open_flags *op) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; @@ -3279,17 +3280,17 @@ static int do_last(struct nameidata *nd, inode_lock(dir->d_inode); else inode_lock_shared(dir->d_inode); - error = lookup_open(nd, &path, file, op, got_write, opened); + error = lookup_open(nd, &path, file, op, got_write); if (open_flag & O_CREAT) inode_unlock(dir->d_inode); else inode_unlock_shared(dir->d_inode); - if (error <= 0) { - if (error) - goto out; + if (error) + goto out; - if ((*opened & FILE_CREATED) || + if (file->f_mode & FMODE_OPENED) { + if ((file->f_mode & FMODE_CREATED) || !S_ISREG(file_inode(file)->i_mode)) will_truncate = false; @@ -3297,7 +3298,7 @@ static int do_last(struct nameidata *nd, goto opened; } - if (*opened & FILE_CREATED) { + if (file->f_mode & FMODE_CREATED) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = false; @@ -3366,20 +3367,15 @@ finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto out; - BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - error = vfs_open(&nd->path, file, current_cred()); + BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */ + error = vfs_open(&nd->path, file); if (error) goto out; - *opened |= FILE_OPENED; opened: - error = open_check_o_direct(file); - if (!error) - error = ima_file_check(file, op->acc_mode, *opened); + error = ima_file_check(file, op->acc_mode); if (!error && will_truncate) error = handle_truncate(file); out: - if (unlikely(error) && (*opened & FILE_OPENED)) - fput(file); if (unlikely(error > 0)) { WARN_ON(1); error = -EINVAL; @@ -3429,7 +3425,7 @@ EXPORT_SYMBOL(vfs_tmpfile); static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, - struct file *file, int *opened) + struct file *file) { struct dentry *child; struct path path; @@ -3451,12 +3447,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, if (error) goto out2; file->f_path.mnt = path.mnt; - error = finish_open(file, child, NULL, opened); - if (error) - goto out2; - error = open_check_o_direct(file); - if (error) - fput(file); + error = finish_open(file, child, NULL); out2: mnt_drop_write(path.mnt); out: @@ -3470,7 +3461,7 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file) int error = path_lookupat(nd, flags, &path); if (!error) { audit_inode(nd->name, path.dentry, 0); - error = vfs_open(&path, file, current_cred()); + error = vfs_open(&path, file); path_put(&path); } return error; @@ -3479,59 +3470,40 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file) static struct file *path_openat(struct nameidata *nd, const struct open_flags *op, unsigned flags) { - const char *s; struct file *file; - int opened = 0; int error; - file = get_empty_filp(); + file = alloc_empty_file(op->open_flag, current_cred()); if (IS_ERR(file)) return file; - file->f_flags = op->open_flag; - if (unlikely(file->f_flags & __O_TMPFILE)) { - error = do_tmpfile(nd, flags, op, file, &opened); - goto out2; - } - - if (unlikely(file->f_flags & O_PATH)) { + error = do_tmpfile(nd, flags, op, file); + } else if (unlikely(file->f_flags & O_PATH)) { error = do_o_path(nd, flags, file); - if (!error) - opened |= FILE_OPENED; - goto out2; - } - - s = path_init(nd, flags); - if (IS_ERR(s)) { - put_filp(file); - return ERR_CAST(s); - } - while (!(error = link_path_walk(s, nd)) && - (error = do_last(nd, file, op, &opened)) > 0) { - nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); - s = trailing_symlink(nd); - if (IS_ERR(s)) { - error = PTR_ERR(s); - break; + } else { + const char *s = path_init(nd, flags); + while (!(error = link_path_walk(s, nd)) && + (error = do_last(nd, file, op)) > 0) { + nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); + s = trailing_symlink(nd); } + terminate_walk(nd); } - terminate_walk(nd); -out2: - if (!(opened & FILE_OPENED)) { - BUG_ON(!error); - put_filp(file); + if (likely(!error)) { + if (likely(file->f_mode & FMODE_OPENED)) + return file; + WARN_ON(1); + error = -EINVAL; } - if (unlikely(error)) { - if (error == -EOPENSTALE) { - if (flags & LOOKUP_RCU) - error = -ECHILD; - else - error = -ESTALE; - } - file = ERR_PTR(error); + fput(file); + if (error == -EOPENSTALE) { + if (flags & LOOKUP_RCU) + error = -ECHILD; + else + error = -ESTALE; } - return file; + return ERR_PTR(error); } struct file *do_filp_open(int dfd, struct filename *pathname, @@ -4683,29 +4655,6 @@ out: return len; } -/* - * A helper for ->readlink(). This should be used *ONLY* for symlinks that - * have ->get_link() not calling nd_jump_link(). Using (or not using) it - * for any given inode is up to filesystem. - */ -static int generic_readlink(struct dentry *dentry, char __user *buffer, - int buflen) -{ - DEFINE_DELAYED_CALL(done); - struct inode *inode = d_inode(dentry); - const char *link = inode->i_link; - int res; - - if (!link) { - link = inode->i_op->get_link(dentry, inode, &done); - if (IS_ERR(link)) - return PTR_ERR(link); - } - res = readlink_copy(buffer, buflen, link); - do_delayed_call(&done); - return res; -} - /** * vfs_readlink - copy symlink body into userspace buffer * @dentry: dentry on which to get symbolic link @@ -4719,6 +4668,9 @@ static int generic_readlink(struct dentry *dentry, char __user *buffer, int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = d_inode(dentry); + DEFINE_DELAYED_CALL(done); + const char *link; + int res; if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) { if (unlikely(inode->i_op->readlink)) @@ -4732,7 +4684,15 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) spin_unlock(&inode->i_lock); } - return generic_readlink(dentry, buffer, buflen); + link = inode->i_link; + if (!link) { + link = inode->i_op->get_link(dentry, inode, &done); + if (IS_ERR(link)) + return PTR_ERR(link); + } + res = readlink_copy(buffer, buflen, link); + do_delayed_call(&done); + return res; } EXPORT_SYMBOL(vfs_readlink); diff --git a/fs/namespace.c b/fs/namespace.c index 8ddd14806799..bd2f4c68506a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -659,12 +659,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); + smp_mb(); // see mntput_no_expire() if (likely(!read_seqretry(&mount_lock, seq))) return 0; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { mnt_add_count(mnt, -1); return 1; } + lock_mount_hash(); + if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + mnt_add_count(mnt, -1); + unlock_mount_hash(); + return 1; + } + unlock_mount_hash(); + /* caller will mntput() */ return -1; } @@ -1195,12 +1204,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); static void mntput_no_expire(struct mount *mnt) { rcu_read_lock(); - mnt_add_count(mnt, -1); - if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + if (likely(READ_ONCE(mnt->mnt_ns))) { + /* + * Since we don't do lock_mount_hash() here, + * ->mnt_ns can change under us. However, if it's + * non-NULL, then there's a reference that won't + * be dropped until after an RCU delay done after + * turning ->mnt_ns NULL. So if we observe it + * non-NULL under rcu_read_lock(), the reference + * we are dropping is not the final one. + */ + mnt_add_count(mnt, -1); rcu_read_unlock(); return; } lock_mount_hash(); + /* + * make sure that if __legitimize_mnt() has not seen us grab + * mount_lock, we'll see their refcount increment here. + */ + smp_mb(); + mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { rcu_read_unlock(); unlock_mount_hash(); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ee81031cab29..64c214fb9da6 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -56,8 +56,8 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, res->change_attr = delegation->change_attr; if (nfs_have_writebacks(inode)) res->change_attr++; - res->ctime = inode->i_ctime; - res->mtime = inode->i_mtime; + res->ctime = timespec64_to_timespec(inode->i_ctime); + res->mtime = timespec64_to_timespec(inode->i_mtime); res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & args->bitmap[0]; res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bbd0465535eb..f033f3a69a3b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -883,8 +883,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { res = nfs_delegation_find_inode_server(server, fhandle); - if (res != ERR_PTR(-ENOENT)) + if (res != ERR_PTR(-ENOENT)) { + rcu_read_unlock(); return res; + } } rcu_read_unlock(); return ERR_PTR(-ENOENT); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7a9c14426855..d7f158c3efc8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1434,12 +1434,11 @@ static int do_open(struct inode *inode, struct file *filp) static int nfs_finish_open(struct nfs_open_context *ctx, struct dentry *dentry, - struct file *file, unsigned open_flags, - int *opened) + struct file *file, unsigned open_flags) { int err; - err = finish_open(file, dentry, do_open, opened); + err = finish_open(file, dentry, do_open); if (err) goto out; if (S_ISREG(file->f_path.dentry->d_inode->i_mode)) @@ -1452,7 +1451,7 @@ out: int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned open_flags, - umode_t mode, int *opened) + umode_t mode) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct nfs_open_context *ctx; @@ -1461,6 +1460,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct inode *inode; unsigned int lookup_flags = 0; bool switched = false; + int created = 0; int err; /* Expect a negative dentry */ @@ -1521,7 +1521,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, goto out; trace_nfs_atomic_open_enter(dir, ctx, open_flags); - inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened); + inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created); + if (created) + file->f_mode |= FMODE_CREATED; if (IS_ERR(inode)) { err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); @@ -1546,7 +1548,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, goto out; } - err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened); + err = nfs_finish_open(ctx, ctx->dentry, file, open_flags); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); put_nfs_open_context(ctx); out: @@ -1641,6 +1643,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); struct inode *inode; + struct dentry *d; int error = -EACCES; d_drop(dentry); @@ -1662,10 +1665,12 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, goto out_error; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); - error = PTR_ERR(inode); - if (IS_ERR(inode)) + d = d_splice_alias(inode, dentry); + if (IS_ERR(d)) { + error = PTR_ERR(d); goto out_error; - d_add(dentry, inode); + } + dput(d); out: dput(parent); return 0; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index d4a07acad598..8f003792ccde 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1243,17 +1243,18 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); + clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { case -NFS4ERR_RESET_TO_PNFS: if (ff_layout_choose_best_ds_for_read(hdr->lseg, hdr->pgio_mirror_idx + 1, &hdr->pgio_mirror_idx)) goto out_eagain; - ff_layout_read_record_layoutstats_done(task, hdr); - pnfs_read_resend_pnfs(hdr); + set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: - ff_layout_reset_read(hdr); + set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); return task->tk_status; case -EAGAIN: goto out_eagain; @@ -1403,6 +1404,10 @@ static void ff_layout_read_release(void *data) struct nfs_pgio_header *hdr = data; ff_layout_read_record_layoutstats_done(&hdr->task, hdr); + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) + pnfs_read_resend_pnfs(hdr); + else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) + ff_layout_reset_read(hdr); pnfs_generic_rw_release(data); } @@ -1423,12 +1428,14 @@ static int ff_layout_write_done_cb(struct rpc_task *task, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); + clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { case -NFS4ERR_RESET_TO_PNFS: - ff_layout_reset_write(hdr, true); + set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: - ff_layout_reset_write(hdr, false); + set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); return task->tk_status; case -EAGAIN: return -EAGAIN; @@ -1575,6 +1582,10 @@ static void ff_layout_write_release(void *data) struct nfs_pgio_header *hdr = data; ff_layout_write_record_layoutstats_done(&hdr->task, hdr); + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) + ff_layout_reset_write(hdr, true); + else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) + ff_layout_reset_write(hdr, false); pnfs_generic_rw_release(data); } diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c index 1c5d8d31fc0a..666415d13d52 100644 --- a/fs/nfs/fscache-index.c +++ b/fs/nfs/fscache-index.c @@ -88,8 +88,8 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, return FSCACHE_CHECKAUX_OBSOLETE; memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime = nfsi->vfs_inode.i_mtime; - auxdata.ctime = nfsi->vfs_inode.i_ctime; + auxdata.mtime = timespec64_to_timespec(nfsi->vfs_inode.i_mtime); + auxdata.ctime = timespec64_to_timespec(nfsi->vfs_inode.i_ctime); if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index b55fc7920c3b..4dc887813c71 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -237,8 +237,8 @@ void nfs_fscache_init_inode(struct inode *inode) return; memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime = nfsi->vfs_inode.i_mtime; - auxdata.ctime = nfsi->vfs_inode.i_ctime; + auxdata.mtime = timespec64_to_timespec(nfsi->vfs_inode.i_mtime); + auxdata.ctime = timespec64_to_timespec(nfsi->vfs_inode.i_ctime); if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); @@ -262,8 +262,8 @@ void nfs_fscache_clear_inode(struct inode *inode) dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie); memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime = nfsi->vfs_inode.i_mtime; - auxdata.ctime = nfsi->vfs_inode.i_ctime; + auxdata.mtime = timespec64_to_timespec(nfsi->vfs_inode.i_mtime); + auxdata.ctime = timespec64_to_timespec(nfsi->vfs_inode.i_ctime); fscache_relinquish_cookie(cookie, &auxdata, false); nfsi->fscache = NULL; } @@ -304,8 +304,8 @@ void nfs_fscache_open_file(struct inode *inode, struct file *filp) return; memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime = nfsi->vfs_inode.i_mtime; - auxdata.ctime = nfsi->vfs_inode.i_ctime; + auxdata.mtime = timespec64_to_timespec(nfsi->vfs_inode.i_mtime); + auxdata.ctime = timespec64_to_timespec(nfsi->vfs_inode.i_ctime); if (inode_is_open_for_write(inode)) { dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 73473d9bdfa4..b65aee481d13 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -501,15 +501,15 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = fattr->atime; + inode->i_atime = timespec_to_timespec64(fattr->atime); else if (nfs_server_capable(inode, NFS_CAP_ATIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = fattr->mtime; + inode->i_mtime = timespec_to_timespec64(fattr->mtime); else if (nfs_server_capable(inode, NFS_CAP_MTIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode->i_ctime = timespec_to_timespec64(fattr->ctime); else if (nfs_server_capable(inode, NFS_CAP_CTIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -694,7 +694,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode->i_ctime = timespec_to_timespec64(fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -705,14 +705,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = fattr->atime; + inode->i_atime = timespec_to_timespec64(fattr->atime); else if (attr->ia_valid & ATTR_ATIME_SET) inode->i_atime = attr->ia_atime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode->i_ctime = timespec_to_timespec64(fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -721,14 +721,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = fattr->mtime; + inode->i_mtime = timespec_to_timespec64(fattr->mtime); else if (attr->ia_valid & ATTR_MTIME_SET) inode->i_mtime = attr->ia_mtime; else nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode->i_ctime = timespec_to_timespec64(fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -1351,6 +1351,8 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { + struct timespec ts; + if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) && inode_eq_iversion_raw(inode, fattr->pre_change_attr)) { @@ -1359,16 +1361,18 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); } /* If we have atomic WCC data, we may update some attributes */ + ts = timespec64_to_timespec(inode->i_ctime); if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) - && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + && timespec_equal(&ts, &fattr->pre_ctime)) { + inode->i_ctime = timespec_to_timespec64(fattr->ctime); } + ts = timespec64_to_timespec(inode->i_mtime); if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) - && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + && timespec_equal(&ts, &fattr->pre_mtime)) { + inode->i_mtime = timespec_to_timespec64(fattr->mtime); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); } @@ -1394,7 +1398,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; unsigned long invalid = 0; - + struct timespec ts; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) return 0; @@ -1411,10 +1415,12 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat invalid |= NFS_INO_INVALID_CHANGE | NFS_INO_REVAL_PAGECACHE; - if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) + ts = timespec64_to_timespec(inode->i_mtime); + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&ts, &fattr->mtime)) invalid |= NFS_INO_INVALID_MTIME; - if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&inode->i_ctime, &fattr->ctime)) + ts = timespec64_to_timespec(inode->i_ctime); + if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&ts, &fattr->ctime)) invalid |= NFS_INO_INVALID_CTIME; if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1444,7 +1450,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) invalid |= NFS_INO_INVALID_OTHER; - if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime)) + ts = timespec64_to_timespec(inode->i_atime); + if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&ts, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) @@ -1716,12 +1723,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { - memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); + fattr->pre_ctime = timespec64_to_timespec(inode->i_ctime); fattr->valid |= NFS_ATTR_FATTR_PRECTIME; } if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { - memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); + fattr->pre_mtime = timespec64_to_timespec(inode->i_mtime); fattr->valid |= NFS_ATTR_FATTR_PREMTIME; } if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && @@ -1884,7 +1891,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } if (fattr->valid & NFS_ATTR_FATTR_MTIME) { - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + inode->i_mtime = timespec_to_timespec64(fattr->mtime); } else if (server->caps & NFS_CAP_MTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_MTIME @@ -1893,7 +1900,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } if (fattr->valid & NFS_ATTR_FATTR_CTIME) { - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + inode->i_ctime = timespec_to_timespec64(fattr->ctime); } else if (server->caps & NFS_CAP_CTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_CTIME @@ -1931,7 +1938,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_ATIME) - memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + inode->i_atime = timespec_to_timespec64(fattr->atime); else if (server->caps & NFS_CAP_ATIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATIME diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 85e4b4a233f9..350675e3ed47 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -354,6 +354,7 @@ static __be32 *xdr_time_not_set(__be32 *p) static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr) { + struct timespec ts; __be32 *p; p = xdr_reserve_space(xdr, NFS_sattr_sz << 2); @@ -375,17 +376,21 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr) else *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); - if (attr->ia_valid & ATTR_ATIME_SET) - p = xdr_encode_time(p, &attr->ia_atime); - else if (attr->ia_valid & ATTR_ATIME) - p = xdr_encode_current_server_time(p, &attr->ia_atime); - else + if (attr->ia_valid & ATTR_ATIME_SET) { + ts = timespec64_to_timespec(attr->ia_atime); + p = xdr_encode_time(p, &ts); + } else if (attr->ia_valid & ATTR_ATIME) { + ts = timespec64_to_timespec(attr->ia_atime); + p = xdr_encode_current_server_time(p, &ts); + } else p = xdr_time_not_set(p); - if (attr->ia_valid & ATTR_MTIME_SET) - xdr_encode_time(p, &attr->ia_mtime); - else if (attr->ia_valid & ATTR_MTIME) - xdr_encode_current_server_time(p, &attr->ia_mtime); - else + if (attr->ia_valid & ATTR_MTIME_SET) { + ts = timespec64_to_timespec(attr->ia_atime); + xdr_encode_time(p, &ts); + } else if (attr->ia_valid & ATTR_MTIME) { + ts = timespec64_to_timespec(attr->ia_mtime); + xdr_encode_current_server_time(p, &ts); + } else xdr_time_not_set(p); } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 09ee36dd8426..64e4fa33d89f 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -561,6 +561,7 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep) */ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr) { + struct timespec ts; u32 nbytes; __be32 *p; @@ -610,8 +611,10 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr) *p++ = xdr_zero; if (attr->ia_valid & ATTR_ATIME_SET) { + struct timespec ts; *p++ = xdr_two; - p = xdr_encode_nfstime3(p, &attr->ia_atime); + ts = timespec64_to_timespec(attr->ia_atime); + p = xdr_encode_nfstime3(p, &ts); } else if (attr->ia_valid & ATTR_ATIME) { *p++ = xdr_one; } else @@ -619,7 +622,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr) if (attr->ia_valid & ATTR_MTIME_SET) { *p++ = xdr_two; - xdr_encode_nfstime3(p, &attr->ia_mtime); + ts = timespec64_to_timespec(attr->ia_mtime); + xdr_encode_nfstime3(p, &ts); } else if (attr->ia_valid & ATTR_MTIME) { *p = xdr_one; } else diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 137e18abb7e7..51beb6e38c90 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -258,7 +258,7 @@ extern const struct dentry_operations nfs4_dentry_operations; /* dir.c */ int nfs_atomic_open(struct inode *, struct dentry *, struct file *, - unsigned, umode_t, int *); + unsigned, umode_t); /* super.c */ extern struct file_system_type nfs4_fs_type; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ed45090e4df6..b790976d3913 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2951,7 +2951,7 @@ static int _nfs4_do_open(struct inode *dir, } } if (opened && opendata->file_created) - *opened |= FILE_CREATED; + *opened = 1; if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) { *ctx_th = opendata->f_attr.mdsthreshold; @@ -3294,6 +3294,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct inode *inode = calldata->inode; + struct pnfs_layout_hdr *lo; bool is_rdonly, is_wronly, is_rdwr; int call_close = 0; @@ -3337,6 +3338,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; } + lo = calldata->arg.lr_args ? calldata->arg.lr_args->layout : NULL; + if (lo && !pnfs_layout_is_valid(lo)) { + calldata->arg.lr_args = NULL; + calldata->res.lr_res = NULL; + } + if (calldata->arg.fmode == 0) task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; @@ -5972,12 +5979,19 @@ static void nfs4_delegreturn_release(void *calldata) static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) { struct nfs4_delegreturndata *d_data; + struct pnfs_layout_hdr *lo; d_data = (struct nfs4_delegreturndata *)data; if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) return; + lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL; + if (lo && !pnfs_layout_is_valid(lo)) { + d_data->args.lr_args = NULL; + d_data->res.lr_res = NULL; + } + nfs4_setup_sequence(d_data->res.server->nfs_client, &d_data->args.seq_args, &d_data->res.seq_res, @@ -6452,34 +6466,34 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) - break; + goto out_restart; } - if (data->arg.new_lock_owner != 0) { nfs_confirm_seqid(&lsp->ls_seqid, 0); nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - goto out_done; - } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid)) - goto out_done; - + } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) + goto out_restart; break; case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: if (data->arg.new_lock_owner != 0) { - if (nfs4_stateid_match(&data->arg.open_stateid, + if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) - goto out_done; - } else if (nfs4_stateid_match(&data->arg.lock_stateid, + goto out_restart; + } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) - goto out_done; + goto out_restart; } - if (!data->cancelled) - rpc_restart_call_prepare(task); out_done: dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); + return; +out_restart: + if (!data->cancelled) + rpc_restart_call_prepare(task); + goto out_done; } static void nfs4_lock_release(void *calldata) @@ -6488,7 +6502,7 @@ static void nfs4_lock_release(void *calldata) dprintk("%s: begin!\n", __func__); nfs_free_seqid(data->arg.open_seqid); - if (data->cancelled) { + if (data->cancelled && data->rpc_status == 0) { struct rpc_task *task; task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, data->arg.lock_seqid); @@ -8650,6 +8664,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); + nfs4_sequence_free_slot(&lgp->res.seq_res); + switch (nfs4err) { case 0: goto out; @@ -8714,7 +8730,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, goto out; } - nfs4_sequence_free_slot(&lgp->res.seq_res); err = nfs4_handle_exception(server, nfs4err, exception); if (!status) { if (exception->retry) @@ -8786,20 +8801,22 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) if (IS_ERR(task)) return ERR_CAST(task); status = rpc_wait_for_completion_task(task); - if (status == 0) { + if (status != 0) + goto out; + + /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ + if (task->tk_status < 0 || lgp->res.layoutp->len == 0) { status = nfs4_layoutget_handle_exception(task, lgp, &exception); *timeout = exception.timeout; - } - + } else + lseg = pnfs_layout_process(lgp); +out: trace_nfs4_layoutget(lgp->args.ctx, &lgp->args.range, &lgp->res.range, &lgp->res.stateid, status); - /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ - if (status == 0 && lgp->res.layoutp->len) - lseg = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); if (status) @@ -8817,6 +8834,8 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) &lrp->args.seq_args, &lrp->res.seq_res, task); + if (!pnfs_layout_is_valid(lrp->args.layout)) + rpc_exit(task, 0); } static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 738a7be019d2..cd41d2577a04 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1069,6 +1069,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server, const uint32_t attrmask[]) { + struct timespec ts; char owner_name[IDMAP_NAMESZ]; char owner_group[IDMAP_NAMESZ]; int owner_namelen = 0; @@ -1157,14 +1158,16 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { if (iap->ia_valid & ATTR_ATIME_SET) { *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - p = xdr_encode_nfstime4(p, &iap->ia_atime); + ts = timespec64_to_timespec(iap->ia_atime); + p = xdr_encode_nfstime4(p, &ts); } else *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { if (iap->ia_valid & ATTR_MTIME_SET) { *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - p = xdr_encode_nfstime4(p, &iap->ia_mtime); + ts = timespec64_to_timespec(iap->ia_mtime); + p = xdr_encode_nfstime4(p, &ts); } else *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a8f5e6b16749..3fe81424337d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -801,6 +801,11 @@ static inline void nfs4_lgopen_release(struct nfs4_layoutget *lgp) { } +static inline bool pnfs_layout_is_valid(const struct pnfs_layout_hdr *lo) +{ + return false; +} + #endif /* CONFIG_NFS_V4_1 */ #if IS_ENABLED(CONFIG_NFS_V4_2) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 77ccaad1399b..4fb1f72a25fb 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -121,13 +121,15 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, { loff_t new_size = lcp->lc_last_wr + 1; struct iattr iattr = { .ia_valid = 0 }; + struct timespec ts; int error; + ts = timespec64_to_timespec(inode->i_mtime); if (lcp->lc_mtime.tv_nsec == UTIME_NOW || - timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) - lcp->lc_mtime = current_time(inode); + timespec_compare(&lcp->lc_mtime, &ts) < 0) + lcp->lc_mtime = timespec64_to_timespec(current_time(inode)); iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; - iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; + iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime); if (new_size > i_size_read(inode)) { iattr.ia_valid |= ATTR_SIZE; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 3192b544a441..9b973f4f7d01 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -165,6 +165,7 @@ static __be32 * encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { + struct timespec ts; *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); *p++ = htonl((u32) (stat->mode & S_IALLUGO)); *p++ = htonl((u32) stat->nlink); @@ -180,9 +181,12 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl((u32) MINOR(stat->rdev)); p = encode_fsid(p, fhp); p = xdr_encode_hyper(p, stat->ino); - p = encode_time3(p, &stat->atime); - p = encode_time3(p, &stat->mtime); - p = encode_time3(p, &stat->ctime); + ts = timespec64_to_timespec(stat->atime); + p = encode_time3(p, &ts); + ts = timespec64_to_timespec(stat->mtime); + p = encode_time3(p, &ts); + ts = timespec64_to_timespec(stat->ctime); + p = encode_time3(p, &ts); return p; } @@ -271,8 +275,8 @@ void fill_pre_wcc(struct svc_fh *fhp) stat.size = inode->i_size; } - fhp->fh_pre_mtime = stat.mtime; - fhp->fh_pre_ctime = stat.ctime; + fhp->fh_pre_mtime = timespec64_to_timespec(stat.mtime); + fhp->fh_pre_ctime = timespec64_to_timespec(stat.ctime); fhp->fh_pre_size = stat.size; fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); fhp->fh_pre_saved = true; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 59d471025949..a96843c59fc1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -320,6 +320,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl, struct xdr_netobj *label, int *umask) { + struct timespec ts; int expected_len, len = 0; u32 dummy32; char *buf; @@ -421,7 +422,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: len += 12; - status = nfsd4_decode_time(argp, &iattr->ia_atime); + status = nfsd4_decode_time(argp, &ts); + iattr->ia_atime = timespec_to_timespec64(ts); if (status) return status; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -440,7 +442,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: len += 12; - status = nfsd4_decode_time(argp, &iattr->ia_mtime); + status = nfsd4_decode_time(argp, &ts); + iattr->ia_mtime = timespec_to_timespec64(ts); if (status) return status; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index a43e8260520a..6b2e8b73d36e 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -131,7 +131,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, { struct dentry *dentry = fhp->fh_dentry; int type; - struct timespec time; + struct timespec64 time; u32 f; type = (stat->mode & S_IFMT); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b0555d7d8200..55a099e47ba2 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -763,7 +763,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, goto out_nfserr; } - host_err = ima_file_check(file, may_flags, 0); + host_err = ima_file_check(file, may_flags); if (host_err) { fput(file); goto out_nfserr; diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 63a1ca4b9dee..e2bea2ac5dfb 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -79,12 +79,11 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) */ static int dnotify_handle_event(struct fsnotify_group *group, struct inode *inode, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, const unsigned char *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { + struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct dnotify_mark *dn_mark; struct dnotify_struct *dn; struct dnotify_struct **prev; @@ -95,7 +94,8 @@ static int dnotify_handle_event(struct fsnotify_group *group, if (!S_ISDIR(inode->i_mode)) return 0; - BUG_ON(vfsmount_mark); + if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info))) + return 0; dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); @@ -319,7 +319,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { - error = fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); + error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0); if (error) { mutex_unlock(&dnotify_group->mark_mutex); goto out_err; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d94e8031fe5f..f90842efea13 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -87,17 +87,17 @@ static int fanotify_get_response(struct fsnotify_group *group, return ret; } -static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmnt_mark, - u32 event_mask, - const void *data, int data_type) +static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, + u32 event_mask, const void *data, + int data_type) { __u32 marks_mask = 0, marks_ignored_mask = 0; const struct path *path = data; + struct fsnotify_mark *mark; + int type; - pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p" - " data_type=%d\n", __func__, inode_mark, vfsmnt_mark, - event_mask, data, data_type); + pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", + __func__, iter_info->report_mask, event_mask, data, data_type); /* if we don't have enough info to send an event to userspace say no */ if (data_type != FSNOTIFY_EVENT_PATH) @@ -108,20 +108,21 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, !d_can_lookup(path->dentry)) return false; - /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! - */ - if (inode_mark && - (!(event_mask & FS_EVENT_ON_CHILD) || - (inode_mark->mask & FS_EVENT_ON_CHILD))) { - marks_mask |= inode_mark->mask; - marks_ignored_mask |= inode_mark->ignored_mask; - } + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + mark = iter_info->marks[type]; + /* + * if the event is for a child and this inode doesn't care about + * events on the child, don't send it! + */ + if (type == FSNOTIFY_OBJ_TYPE_INODE && + (event_mask & FS_EVENT_ON_CHILD) && + !(mark->mask & FS_EVENT_ON_CHILD)) + continue; - if (vfsmnt_mark) { - marks_mask |= vfsmnt_mark->mask; - marks_ignored_mask |= vfsmnt_mark->ignored_mask; + marks_mask |= mark->mask; + marks_ignored_mask |= mark->ignored_mask; } if (d_is_dir(path->dentry) && @@ -178,8 +179,6 @@ init: __maybe_unused static int fanotify_handle_event(struct fsnotify_group *group, struct inode *inode, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *fanotify_mark, u32 mask, const void *data, int data_type, const unsigned char *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) @@ -199,8 +198,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); - if (!fanotify_should_send_event(inode_mark, fanotify_mark, mask, data, - data_type)) + if (!fanotify_should_send_event(iter_info, mask, data, data_type)) return 0; pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index d478629c728b..10aac1942c9f 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -77,7 +77,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) struct inotify_inode_mark *inode_mark; struct inode *inode; - if (!(mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE)) + if (mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE) return; inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); @@ -116,7 +116,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) mflags |= FAN_MARK_IGNORED_SURV_MODIFY; - if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE) { + if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = igrab(mark->connector->inode); if (!inode) return; @@ -126,7 +126,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); - } else if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { struct mount *mnt = real_mount(mark->connector->mnt); seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 613ec7e5a465..f174397b63a0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -184,8 +184,6 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask EXPORT_SYMBOL_GPL(__fsnotify_parent); static int send_to_group(struct inode *to_tell, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmount_mark, __u32 mask, const void *data, int data_is, u32 cookie, const unsigned char *file_name, @@ -195,48 +193,45 @@ static int send_to_group(struct inode *to_tell, __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); __u32 marks_mask = 0; __u32 marks_ignored_mask = 0; + struct fsnotify_mark *mark; + int type; - if (unlikely(!inode_mark && !vfsmount_mark)) { - BUG(); + if (WARN_ON(!iter_info->report_mask)) return 0; - } /* clear ignored on inode modification */ if (mask & FS_MODIFY) { - if (inode_mark && - !(inode_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) - inode_mark->ignored_mask = 0; - if (vfsmount_mark && - !(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) - vfsmount_mark->ignored_mask = 0; - } - - /* does the inode mark tell us to do something? */ - if (inode_mark) { - group = inode_mark->group; - marks_mask |= inode_mark->mask; - marks_ignored_mask |= inode_mark->ignored_mask; + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + mark = iter_info->marks[type]; + if (mark && + !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) + mark->ignored_mask = 0; + } } - /* does the vfsmount_mark tell us to do something? */ - if (vfsmount_mark) { - group = vfsmount_mark->group; - marks_mask |= vfsmount_mark->mask; - marks_ignored_mask |= vfsmount_mark->ignored_mask; + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + mark = iter_info->marks[type]; + /* does the object mark tell us to do something? */ + if (mark) { + group = mark->group; + marks_mask |= mark->mask; + marks_ignored_mask |= mark->ignored_mask; + } } - pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" - " vfsmount_mark=%p marks_mask=%x marks_ignored_mask=%x" + pr_debug("%s: group=%p to_tell=%p mask=%x marks_mask=%x marks_ignored_mask=%x" " data=%p data_is=%d cookie=%d\n", - __func__, group, to_tell, mask, inode_mark, vfsmount_mark, - marks_mask, marks_ignored_mask, data, - data_is, cookie); + __func__, group, to_tell, mask, marks_mask, marks_ignored_mask, + data, data_is, cookie); if (!(test_mask & marks_mask & ~marks_ignored_mask)) return 0; - return group->ops->handle_event(group, to_tell, inode_mark, - vfsmount_mark, mask, data, data_is, + return group->ops->handle_event(group, to_tell, mask, data, data_is, file_name, cookie, iter_info); } @@ -264,6 +259,57 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) } /* + * iter_info is a multi head priority queue of marks. + * Pick a subset of marks from queue heads, all with the + * same group and set the report_mask for selected subset. + * Returns the report_mask of the selected subset. + */ +static unsigned int fsnotify_iter_select_report_types( + struct fsnotify_iter_info *iter_info) +{ + struct fsnotify_group *max_prio_group = NULL; + struct fsnotify_mark *mark; + int type; + + /* Choose max prio group among groups of all queue heads */ + fsnotify_foreach_obj_type(type) { + mark = iter_info->marks[type]; + if (mark && + fsnotify_compare_groups(max_prio_group, mark->group) > 0) + max_prio_group = mark->group; + } + + if (!max_prio_group) + return 0; + + /* Set the report mask for marks from same group as max prio group */ + iter_info->report_mask = 0; + fsnotify_foreach_obj_type(type) { + mark = iter_info->marks[type]; + if (mark && + fsnotify_compare_groups(max_prio_group, mark->group) == 0) + fsnotify_iter_set_report_type(iter_info, type); + } + + return iter_info->report_mask; +} + +/* + * Pop from iter_info multi head queue, the marks that were iterated in the + * current iteration step. + */ +static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) +{ + int type; + + fsnotify_foreach_obj_type(type) { + if (fsnotify_iter_should_report_type(iter_info, type)) + iter_info->marks[type] = + fsnotify_next_mark(iter_info->marks[type]); + } +} + +/* * This is the main call to fsnotify. The VFS calls into hook specific functions * in linux/fsnotify.h. Those functions then in turn call here. Here will call * out to all of the registered fsnotify_group. Those groups can then use the @@ -307,15 +353,15 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, if ((mask & FS_MODIFY) || (test_mask & to_tell->i_fsnotify_mask)) { - iter_info.inode_mark = + iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&to_tell->i_fsnotify_marks); } if (mnt && ((mask & FS_MODIFY) || (test_mask & mnt->mnt_fsnotify_mask))) { - iter_info.inode_mark = + iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&to_tell->i_fsnotify_marks); - iter_info.vfsmount_mark = + iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } @@ -324,32 +370,14 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * ignore masks are properly reflected for mount mark notifications. * That's why this traversal is so complicated... */ - while (iter_info.inode_mark || iter_info.vfsmount_mark) { - struct fsnotify_mark *inode_mark = iter_info.inode_mark; - struct fsnotify_mark *vfsmount_mark = iter_info.vfsmount_mark; - - if (inode_mark && vfsmount_mark) { - int cmp = fsnotify_compare_groups(inode_mark->group, - vfsmount_mark->group); - if (cmp > 0) - inode_mark = NULL; - else if (cmp < 0) - vfsmount_mark = NULL; - } - - ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, - data, data_is, cookie, file_name, - &iter_info); + while (fsnotify_iter_select_report_types(&iter_info)) { + ret = send_to_group(to_tell, mask, data, data_is, cookie, + file_name, &iter_info); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; - if (inode_mark) - iter_info.inode_mark = - fsnotify_next_mark(iter_info.inode_mark); - if (vfsmount_mark) - iter_info.vfsmount_mark = - fsnotify_next_mark(iter_info.vfsmount_mark); + fsnotify_iter_next(&iter_info); } ret = 0; out: diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 60f365dc1408..34515d2c4ba3 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -9,12 +9,6 @@ #include "../mount.h" -struct fsnotify_iter_info { - struct fsnotify_mark *inode_mark; - struct fsnotify_mark *vfsmount_mark; - int srcu_idx; -}; - /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); diff --git a/fs/notify/group.c b/fs/notify/group.c index b7a4b6a69efa..aa5468f23e45 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -67,7 +67,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) fsnotify_group_stop_queueing(group); /* Clear all marks for this group and queue them for destruction */ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES_MASK); /* * Some marks can still be pinned when waiting for response from diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index c00d2caca894..7e4578d35b61 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -25,8 +25,6 @@ extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group); extern int inotify_handle_event(struct fsnotify_group *group, struct inode *inode, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, const unsigned char *file_name, u32 cookie, struct fsnotify_iter_info *iter_info); diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 40dedb37a1f3..9ab6dde38a14 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -65,12 +65,11 @@ static int inotify_merge(struct list_head *list, int inotify_handle_event(struct fsnotify_group *group, struct inode *inode, - struct fsnotify_mark *inode_mark, - struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, const unsigned char *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { + struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct inotify_inode_mark *i_mark; struct inotify_event_info *event; struct fsnotify_event *fsn_event; @@ -78,7 +77,8 @@ int inotify_handle_event(struct fsnotify_group *group, int len = 0; int alloc_len = sizeof(struct inotify_event_info); - BUG_ON(vfsmount_mark); + if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info))) + return 0; if ((inode_mark->mask & FS_EXCL_UNLINK) && (data_type == FSNOTIFY_EVENT_PATH)) { diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ef32f3657958..1cf5b779d862 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -485,10 +485,14 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { struct inotify_inode_mark *i_mark; + struct fsnotify_iter_info iter_info = { }; + + fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE, + fsn_mark); /* Queue ignore event for the watch */ - inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, - NULL, FSNOTIFY_EVENT_NONE, NULL, 0, NULL); + inotify_handle_event(group, NULL, FS_IN_IGNORED, NULL, + FSNOTIFY_EVENT_NONE, NULL, 0, &iter_info); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -578,7 +582,7 @@ static int inotify_new_watch(struct fsnotify_group *group, } /* we are on the idr, now get on the inode */ - ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, inode, NULL, 0); + ret = fsnotify_add_inode_mark_locked(&tmp_i_mark->fsn_mark, inode, 0); if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index e9191b416434..61f4c5fa34c7 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -119,9 +119,9 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) new_mask |= mark->mask; } - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) conn->inode->i_fsnotify_mask = new_mask; - else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) + else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask; } @@ -139,7 +139,7 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) spin_lock(&conn->lock); __fsnotify_recalc_mask(conn); spin_unlock(&conn->lock); - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) __fsnotify_update_child_dentry_flags(conn->inode); } @@ -166,18 +166,18 @@ static struct inode *fsnotify_detach_connector_from_object( { struct inode *inode = NULL; - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = conn->inode; rcu_assign_pointer(inode->i_fsnotify_marks, NULL); inode->i_fsnotify_mask = 0; conn->inode = NULL; - conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; - } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; + } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, NULL); real_mount(conn->mnt)->mnt_fsnotify_mask = 0; conn->mnt = NULL; - conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; + conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; } return inode; @@ -294,12 +294,12 @@ static void fsnotify_put_mark_wake(struct fsnotify_mark *mark) bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) { - /* This can fail if mark is being removed */ - if (!fsnotify_get_mark_safe(iter_info->inode_mark)) - return false; - if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) { - fsnotify_put_mark_wake(iter_info->inode_mark); - return false; + int type; + + fsnotify_foreach_obj_type(type) { + /* This can fail if mark is being removed */ + if (!fsnotify_get_mark_safe(iter_info->marks[type])) + goto fail; } /* @@ -310,13 +310,20 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); return true; + +fail: + for (type--; type >= 0; type--) + fsnotify_put_mark_wake(iter_info->marks[type]); + return false; } void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) { + int type; + iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - fsnotify_put_mark_wake(iter_info->inode_mark); - fsnotify_put_mark_wake(iter_info->vfsmount_mark); + fsnotify_foreach_obj_type(type) + fsnotify_put_mark_wake(iter_info->marks[type]); } /* @@ -442,10 +449,10 @@ static int fsnotify_attach_connector_to_object( spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); if (inode) { - conn->flags = FSNOTIFY_OBJ_TYPE_INODE; + conn->type = FSNOTIFY_OBJ_TYPE_INODE; conn->inode = igrab(inode); } else { - conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT; + conn->type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; conn->mnt = mnt; } /* @@ -479,8 +486,7 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector( if (!conn) goto out; spin_lock(&conn->lock); - if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE | - FSNOTIFY_OBJ_TYPE_VFSMOUNT))) { + if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) { spin_unlock(&conn->lock); srcu_read_unlock(&fsnotify_mark_srcu, idx); return NULL; @@ -646,16 +652,16 @@ struct fsnotify_mark *fsnotify_find_mark( return NULL; } -/* Clear any marks in a group with given type */ +/* Clear any marks in a group with given type mask */ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, - unsigned int type) + unsigned int type_mask) { struct fsnotify_mark *lmark, *mark; LIST_HEAD(to_free); struct list_head *head = &to_free; /* Skip selection step if we want to clear all marks. */ - if (type == FSNOTIFY_OBJ_ALL_TYPES) { + if (type_mask == FSNOTIFY_OBJ_ALL_TYPES_MASK) { head = &group->marks_list; goto clear; } @@ -670,7 +676,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, */ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->connector->flags & type) + if ((1U << mark->connector->type) & type_mask) list_move(&mark->g_list, &to_free); } mutex_unlock(&group->mark_mutex); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 1c1ee489284b..decaf75d1cd5 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -667,18 +667,18 @@ static int ntfs_read_locked_inode(struct inode *vi) * mtime is the last change of the data within the file. Not changed * when only metadata is changed, e.g. a rename doesn't affect mtime. */ - vi->i_mtime = ntfs2utc(si->last_data_change_time); + vi->i_mtime = timespec_to_timespec64(ntfs2utc(si->last_data_change_time)); /* * ctime is the last change of the metadata of the file. This obviously * always changes, when mtime is changed. ctime can be changed on its * own, mtime is then not changed, e.g. when a file is renamed. */ - vi->i_ctime = ntfs2utc(si->last_mft_change_time); + vi->i_ctime = timespec_to_timespec64(ntfs2utc(si->last_mft_change_time)); /* * Last access to the data within the file. Not changed during a rename * for example but changed whenever the file is written to. */ - vi->i_atime = ntfs2utc(si->last_access_time); + vi->i_atime = timespec_to_timespec64(ntfs2utc(si->last_access_time)); /* Find the attribute list attribute if present. */ ntfs_attr_reinit_search_ctx(ctx); @@ -2804,11 +2804,11 @@ done: * for real. */ if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) { - struct timespec now = current_time(VFS_I(base_ni)); + struct timespec64 now = current_time(VFS_I(base_ni)); int sync_it = 0; - if (!timespec_equal(&VFS_I(base_ni)->i_mtime, &now) || - !timespec_equal(&VFS_I(base_ni)->i_ctime, &now)) + if (!timespec64_equal(&VFS_I(base_ni)->i_mtime, &now) || + !timespec64_equal(&VFS_I(base_ni)->i_ctime, &now)) sync_it = 1; VFS_I(base_ni)->i_mtime = now; VFS_I(base_ni)->i_ctime = now; @@ -2923,14 +2923,14 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) } } if (ia_valid & ATTR_ATIME) - vi->i_atime = timespec_trunc(attr->ia_atime, - vi->i_sb->s_time_gran); + vi->i_atime = timespec64_trunc(attr->ia_atime, + vi->i_sb->s_time_gran); if (ia_valid & ATTR_MTIME) - vi->i_mtime = timespec_trunc(attr->ia_mtime, - vi->i_sb->s_time_gran); + vi->i_mtime = timespec64_trunc(attr->ia_mtime, + vi->i_sb->s_time_gran); if (ia_valid & ATTR_CTIME) - vi->i_ctime = timespec_trunc(attr->ia_ctime, - vi->i_sb->s_time_gran); + vi->i_ctime = timespec64_trunc(attr->ia_ctime, + vi->i_sb->s_time_gran); mark_inode_dirty(vi); out: return err; @@ -2997,7 +2997,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si = (STANDARD_INFORMATION*)((u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset)); /* Update the access times if they have changed. */ - nt = utc2ntfs(vi->i_mtime); + nt = utc2ntfs(timespec64_to_timespec(vi->i_mtime)); if (si->last_data_change_time != nt) { ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, (long long) @@ -3006,7 +3006,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si->last_data_change_time = nt; modified = true; } - nt = utc2ntfs(vi->i_ctime); + nt = utc2ntfs(timespec64_to_timespec(vi->i_ctime)); if (si->last_mft_change_time != nt) { ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, (long long) @@ -3015,7 +3015,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) si->last_mft_change_time = nt; modified = true; } - nt = utc2ntfs(vi->i_atime); + nt = utc2ntfs(timespec64_to_timespec(vi->i_atime)); if (si->last_access_time != nt) { ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 68728de12864..0ff424c6d17c 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2140,6 +2140,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_meta_lvb *lvb; + struct timespec ts; lvb = ocfs2_dlm_lvb(&lockres->l_lksb); @@ -2160,12 +2161,15 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); lvb->lvb_imode = cpu_to_be16(inode->i_mode); lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); + ts = timespec64_to_timespec(inode->i_atime); lvb->lvb_iatime_packed = - cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); + cpu_to_be64(ocfs2_pack_timespec(&ts)); + ts = timespec64_to_timespec(inode->i_ctime); lvb->lvb_ictime_packed = - cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); + cpu_to_be64(ocfs2_pack_timespec(&ts)); + ts = timespec64_to_timespec(inode->i_mtime); lvb->lvb_imtime_packed = - cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); + cpu_to_be64(ocfs2_pack_timespec(&ts)); lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); @@ -2183,6 +2187,7 @@ static void ocfs2_unpack_timespec(struct timespec *spec, static void ocfs2_refresh_inode_from_lvb(struct inode *inode) { + struct timespec ts; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; struct ocfs2_meta_lvb *lvb; @@ -2210,12 +2215,15 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); inode->i_mode = be16_to_cpu(lvb->lvb_imode); set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); - ocfs2_unpack_timespec(&inode->i_atime, + ocfs2_unpack_timespec(&ts, be64_to_cpu(lvb->lvb_iatime_packed)); - ocfs2_unpack_timespec(&inode->i_mtime, + inode->i_atime = timespec_to_timespec64(ts); + ocfs2_unpack_timespec(&ts, be64_to_cpu(lvb->lvb_imtime_packed)); - ocfs2_unpack_timespec(&inode->i_ctime, + inode->i_mtime = timespec_to_timespec64(ts); + ocfs2_unpack_timespec(&ts, be64_to_cpu(lvb->lvb_ictime_packed)); + inode->i_ctime = timespec_to_timespec64(ts); spin_unlock(&oi->ip_lock); } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a2a8603d27e0..255f758af03a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -222,7 +222,7 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, int ocfs2_should_update_atime(struct inode *inode, struct vfsmount *vfsmnt) { - struct timespec now; + struct timespec64 now; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) @@ -248,8 +248,8 @@ int ocfs2_should_update_atime(struct inode *inode, return 0; if (vfsmnt->mnt_flags & MNT_RELATIME) { - if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) || - (timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0)) + if ((timespec64_compare(&inode->i_atime, &inode->i_mtime) <= 0) || + (timespec64_compare(&inode->i_atime, &inode->i_ctime) <= 0)) return 1; return 0; diff --git a/fs/open.c b/fs/open.c index d0e955b558ad..d98e19239bb7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -724,27 +724,13 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) return ksys_fchown(fd, user, group); } -int open_check_o_direct(struct file *f) -{ - /* NB: we're sure to have correct a_ops only after f_op->open */ - if (f->f_flags & O_DIRECT) { - if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) - return -EINVAL; - } - return 0; -} - static int do_dentry_open(struct file *f, struct inode *inode, - int (*open)(struct inode *, struct file *), - const struct cred *cred) + int (*open)(struct inode *, struct file *)) { static const struct file_operations empty_fops = {}; int error; - f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | - FMODE_PREAD | FMODE_PWRITE; - path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; @@ -753,7 +739,7 @@ static int do_dentry_open(struct file *f, f->f_wb_err = filemap_sample_wb_err(f->f_mapping); if (unlikely(f->f_flags & O_PATH)) { - f->f_mode = FMODE_PATH; + f->f_mode = FMODE_PATH | FMODE_OPENED; f->f_op = &empty_fops; return 0; } @@ -780,7 +766,7 @@ static int do_dentry_open(struct file *f, goto cleanup_all; } - error = security_file_open(f, cred); + error = security_file_open(f); if (error) goto cleanup_all; @@ -788,6 +774,8 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; + /* normally all 3 are set; ->open() can clear them if needed */ + f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; if (!open) open = f->f_op->open; if (open) { @@ -795,6 +783,7 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; } + f->f_mode |= FMODE_OPENED; if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); if ((f->f_mode & FMODE_READ) && @@ -809,9 +798,16 @@ static int do_dentry_open(struct file *f, file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); + /* NB: we're sure to have correct a_ops only after f_op->open */ + if (f->f_flags & O_DIRECT) { + if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) + return -EINVAL; + } return 0; cleanup_all: + if (WARN_ON_ONCE(error > 0)) + error = -EINVAL; fops_put(f->f_op); if (f->f_mode & FMODE_WRITER) { put_write_access(inode); @@ -847,19 +843,12 @@ cleanup_file: * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, - int (*open)(struct inode *, struct file *), - int *opened) + int (*open)(struct inode *, struct file *)) { - int error; - BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ + BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; - error = do_dentry_open(file, d_backing_inode(dentry), open, - current_cred()); - if (!error) - *opened |= FILE_OPENED; - - return error; + return do_dentry_open(file, d_backing_inode(dentry), open); } EXPORT_SYMBOL(finish_open); @@ -874,13 +863,13 @@ EXPORT_SYMBOL(finish_open); * NB: unlike finish_open() this function does consume the dentry reference and * the caller need not dput() it. * - * Returns "1" which must be the return value of ->atomic_open() after having + * Returns "0" which must be the return value of ->atomic_open() after having * called this function. */ int finish_no_open(struct file *file, struct dentry *dentry) { file->f_path.dentry = dentry; - return 1; + return 0; } EXPORT_SYMBOL(finish_no_open); @@ -896,8 +885,7 @@ EXPORT_SYMBOL(file_path); * @file: newly allocated file with f_flag initialized * @cred: credentials to use */ -int vfs_open(const struct path *path, struct file *file, - const struct cred *cred) +int vfs_open(const struct path *path, struct file *file) { struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0); @@ -905,7 +893,7 @@ int vfs_open(const struct path *path, struct file *file, return PTR_ERR(dentry); file->f_path = *path; - return do_dentry_open(file, d_backing_inode(dentry), NULL, cred); + return do_dentry_open(file, d_backing_inode(dentry), NULL); } struct file *dentry_open(const struct path *path, int flags, @@ -919,19 +907,11 @@ struct file *dentry_open(const struct path *path, int flags, /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); - f = get_empty_filp(); + f = alloc_empty_file(flags, cred); if (!IS_ERR(f)) { - f->f_flags = flags; - error = vfs_open(path, f, cred); - if (!error) { - /* from now on we need fput() to dispose of f */ - error = open_check_o_direct(f); - if (error) { - fput(f); - f = ERR_PTR(error); - } - } else { - put_filp(f); + error = vfs_open(path, f); + if (error) { + fput(f); f = ERR_PTR(error); } } @@ -1063,26 +1043,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, } EXPORT_SYMBOL(file_open_root); -struct file *filp_clone_open(struct file *oldfile) -{ - struct file *file; - int retval; - - file = get_empty_filp(); - if (IS_ERR(file)) - return file; - - file->f_flags = oldfile->f_flags; - retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred); - if (retval) { - put_filp(file); - return ERR_PTR(retval); - } - - return file; -} -EXPORT_SYMBOL(filp_clone_open); - long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) { struct open_flags op; diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 74b37cbbd5d4..33ee8cb32f83 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -719,37 +719,6 @@ struct ORANGEFS_dev_map_desc32 { __s32 count; }; -static unsigned long translate_dev_map26(unsigned long args, long *error) -{ - struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args; - /* - * Depending on the architecture, allocate some space on the - * user-call-stack based on our expected layout. - */ - struct ORANGEFS_dev_map_desc __user *p = - compat_alloc_user_space(sizeof(*p)); - compat_uptr_t addr; - - *error = 0; - /* get the ptr from the 32 bit user-space */ - if (get_user(addr, &p32->ptr)) - goto err; - /* try to put that into a 64-bit layout */ - if (put_user(compat_ptr(addr), &p->ptr)) - goto err; - /* copy the remaining fields */ - if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32))) - goto err; - if (copy_in_user(&p->size, &p32->size, sizeof(__s32))) - goto err; - if (copy_in_user(&p->count, &p32->count, sizeof(__s32))) - goto err; - return (unsigned long)p; -err: - *error = -EFAULT; - return 0; -} - /* * 32 bit user-space apps' ioctl handlers when kernel modules * is compiled as a 64 bit one @@ -758,25 +727,26 @@ static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long args) { long ret; - unsigned long arg = args; /* Check for properly constructed commands */ ret = check_ioctl_command(cmd); if (ret < 0) return ret; if (cmd == ORANGEFS_DEV_MAP) { - /* - * convert the arguments to what we expect internally - * in kernel space - */ - arg = translate_dev_map26(args, &ret); - if (ret < 0) { - gossip_err("Could not translate dev map\n"); - return ret; - } + struct ORANGEFS_dev_map_desc desc; + struct ORANGEFS_dev_map_desc32 d32; + + if (copy_from_user(&d32, (void __user *)args, sizeof(d32))) + return -EFAULT; + + desc.ptr = compat_ptr(d32.ptr); + desc.total_size = d32.total_size; + desc.size = d32.size; + desc.count = d32.count; + return orangefs_bufmap_initialize(&desc); } /* no other ioctl requires translation */ - return dispatch_ioctl_command(cmd, arg); + return dispatch_ioctl_command(cmd, args); } #endif /* CONFIG_COMPAT is in .config */ diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index d6db252e6200..6e4d2af8f5bc 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -297,7 +297,7 @@ int orangefs_permission(struct inode *inode, int mask) return generic_permission(inode, mask); } -int orangefs_update_time(struct inode *inode, struct timespec *time, int flags) +int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags) { struct iattr iattr; gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n", diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 004511617b6d..17b24ad6b264 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -342,7 +342,7 @@ int orangefs_getattr(const struct path *path, struct kstat *stat, int orangefs_permission(struct inode *inode, int mask); -int orangefs_update_time(struct inode *, struct timespec *, int); +int orangefs_update_time(struct inode *, struct timespec64 *, int); /* * defined in xattr.c diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index 079a465796f3..dd28079f518c 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Documentation/ABI/stable/orangefs-sysfs: + * Documentation/ABI/stable/sysfs-fs-orangefs: * * What: /sys/fs/orangefs/perf_counter_reset * Date: June 2015 diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 1db5b3b458a1..ed16a898caeb 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -416,7 +416,7 @@ int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) return err; } -int ovl_update_time(struct inode *inode, struct timespec *ts, int flags) +int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) { if (flags & S_ATIME) { struct ovl_fs *ofs = inode->i_sb->s_fs_info; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 3c5e9f18b0d9..7538b9b56237 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -325,7 +325,7 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); struct posix_acl *ovl_get_acl(struct inode *inode, int type); int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); -int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); +int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); bool ovl_is_private_xattr(const char *name); struct ovl_inode_params { diff --git a/fs/pipe.c b/fs/pipe.c index bb0840e234f3..bdc5d3c0977d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -509,22 +509,19 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } } -static struct wait_queue_head * -pipe_get_poll_head(struct file *filp, __poll_t events) -{ - struct pipe_inode_info *pipe = filp->private_data; - - return &pipe->wait; -} - /* No kernel lock held - fine */ -static __poll_t pipe_poll_mask(struct file *filp, __poll_t events) +static __poll_t +pipe_poll(struct file *filp, poll_table *wait) { + __poll_t mask; struct pipe_inode_info *pipe = filp->private_data; - int nrbufs = pipe->nrbufs; - __poll_t mask = 0; + int nrbufs; + + poll_wait(filp, &pipe->wait, wait); /* Reading only -- no need for acquiring the semaphore. */ + nrbufs = pipe->nrbufs; + mask = 0; if (filp->f_mode & FMODE_READ) { mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0; if (!pipe->writers && filp->f_version != pipe->w_counter) @@ -744,54 +741,33 @@ fail_inode: int create_pipe_files(struct file **res, int flags) { - int err; struct inode *inode = get_pipe_inode(); struct file *f; - struct path path; if (!inode) return -ENFILE; - err = -ENOMEM; - path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &empty_name); - if (!path.dentry) - goto err_inode; - path.mnt = mntget(pipe_mnt); - - d_instantiate(path.dentry, inode); - - f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); + f = alloc_file_pseudo(inode, pipe_mnt, "", + O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)), + &pipefifo_fops); if (IS_ERR(f)) { - err = PTR_ERR(f); - goto err_dentry; + free_pipe_info(inode->i_pipe); + iput(inode); + return PTR_ERR(f); } - f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); f->private_data = inode->i_pipe; - res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); + res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), + &pipefifo_fops); if (IS_ERR(res[0])) { - err = PTR_ERR(res[0]); - goto err_file; + put_pipe_info(inode, inode->i_pipe); + fput(f); + return PTR_ERR(res[0]); } - - path_get(&path); res[0]->private_data = inode->i_pipe; - res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); res[1] = f; return 0; - -err_file: - put_filp(f); -err_dentry: - free_pipe_info(inode->i_pipe); - path_put(&path); - return err; - -err_inode: - free_pipe_info(inode->i_pipe); - iput(inode); - return err; } static int __do_pipe_flags(int *fd, struct file **files, int flags) @@ -1023,8 +999,7 @@ const struct file_operations pipefifo_fops = { .llseek = no_llseek, .read_iter = pipe_read, .write_iter = pipe_write, - .get_poll_head = pipe_get_poll_head, - .poll_mask = pipe_poll_mask, + .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, diff --git a/fs/proc/base.c b/fs/proc/base.c index 80aa42506b8b..aaffc0c30216 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -235,6 +235,10 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, if (env_start != arg_end || env_start >= env_end) env_start = env_end = arg_end; + /* .. and limit it to a maximum of one page of slop */ + if (env_end >= arg_end + PAGE_SIZE) + env_end = arg_end + PAGE_SIZE - 1; + /* We're not going to care if "*ppos" has high bits set */ pos = arg_start + *ppos; @@ -254,10 +258,19 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, while (count) { int got; size_t size = min_t(size_t, PAGE_SIZE, count); + long offset; - got = access_remote_vm(mm, pos, page, size, FOLL_ANON); - if (got <= 0) + /* + * Are we already starting past the official end? + * We always include the last byte that is *supposed* + * to be NUL + */ + offset = (pos >= arg_end) ? pos - arg_end + 1 : 0; + + got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON); + if (got <= offset) break; + got -= offset; /* Don't walk past a NUL character once you hit arg_end */ if (pos + got >= arg_end) { @@ -276,12 +289,17 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, n = arg_end - pos - 1; /* Cut off at first NUL after 'n' */ - got = n + strnlen(page+n, got-n); - if (!got) + got = n + strnlen(page+n, offset+got-n); + if (got < offset) break; + got -= offset; + + /* Include the NUL if it existed */ + if (got < size) + got++; } - got -= copy_to_user(buf, page, got); + got -= copy_to_user(buf, page+offset, got); if (unlikely(!got)) { if (!len) len = -EFAULT; @@ -2439,14 +2457,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, for (p = ents; p < last; p++) { if (p->len != dentry->d_name.len) continue; - if (!memcmp(dentry->d_name.name, p->name, p->len)) + if (!memcmp(dentry->d_name.name, p->name, p->len)) { + res = proc_pident_instantiate(dentry, task, p); break; + } } - if (p >= last) - goto out; - - res = proc_pident_instantiate(dentry, task, p); -out: put_task_struct(task); out_no_task: return res; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 7b4d9714f248..bb1c1625b158 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -409,7 +409,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, if (!ent) goto out; - if (qstr.len + 1 <= sizeof(ent->inline_name)) { + if (qstr.len + 1 <= SIZEOF_PDE_INLINE_NAME) { ent->name = ent->inline_name; } else { ent->name = kmalloc(qstr.len + 1, GFP_KERNEL); @@ -564,11 +564,20 @@ static int proc_seq_open(struct inode *inode, struct file *file) return seq_open(file, de->seq_ops); } +static int proc_seq_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *de = PDE(inode); + + if (de->state_size) + return seq_release_private(inode, file); + return seq_release(inode, file); +} + static const struct file_operations proc_seq_fops = { .open = proc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = proc_seq_release, }; struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, @@ -740,3 +749,27 @@ void *PDE_DATA(const struct inode *inode) return __PDE_DATA(inode); } EXPORT_SYMBOL(PDE_DATA); + +/* + * Pull a user buffer into memory and pass it to the file's write handler if + * one is supplied. The ->write() method is permitted to modify the + * kernel-side buffer. + */ +ssize_t proc_simple_write(struct file *f, const char __user *ubuf, size_t size, + loff_t *_pos) +{ + struct proc_dir_entry *pde = PDE(file_inode(f)); + char *buf; + int ret; + + if (!pde->write) + return -EACCES; + if (size == 0 || size > PAGE_SIZE - 1) + return -EINVAL; + buf = memdup_user_nul(ubuf, size); + if (IS_ERR(buf)) + return PTR_ERR(buf); + ret = pde->write(f, buf, size); + kfree(buf); + return ret == 0 ? size : ret; +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 2cf3b74391ca..85ffbd27f288 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -105,9 +105,8 @@ void __init proc_init_kmemcache(void) kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0, SLAB_ACCOUNT|SLAB_PANIC, NULL); proc_dir_entry_cache = kmem_cache_create_usercopy( - "proc_dir_entry", sizeof(struct proc_dir_entry), 0, SLAB_PANIC, - offsetof(struct proc_dir_entry, inline_name), - sizeof_field(struct proc_dir_entry, inline_name), NULL); + "proc_dir_entry", SIZEOF_PDE_SLOT, 0, SLAB_PANIC, + OFFSETOF_PDE_NAME, SIZEOF_PDE_INLINE_NAME, NULL); } static int proc_show_options(struct seq_file *seq, struct dentry *root) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 50cb22a08c2f..da3dbfa09e79 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -48,6 +48,7 @@ struct proc_dir_entry { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); }; + proc_write_t write; void *data; unsigned int state_size; unsigned int low_ino; @@ -61,14 +62,20 @@ struct proc_dir_entry { char *name; umode_t mode; u8 namelen; -#ifdef CONFIG_64BIT -#define SIZEOF_PDE_INLINE_NAME (192-155) -#else -#define SIZEOF_PDE_INLINE_NAME (128-95) -#endif - char inline_name[SIZEOF_PDE_INLINE_NAME]; + char inline_name[]; } __randomize_layout; +#define OFFSETOF_PDE_NAME offsetof(struct proc_dir_entry, inline_name) +#define SIZEOF_PDE_SLOT \ + (OFFSETOF_PDE_NAME + 34 <= 64 ? 64 : \ + OFFSETOF_PDE_NAME + 34 <= 128 ? 128 : \ + OFFSETOF_PDE_NAME + 34 <= 192 ? 192 : \ + OFFSETOF_PDE_NAME + 34 <= 256 ? 256 : \ + OFFSETOF_PDE_NAME + 34 <= 512 ? 512 : \ + 0) + +#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE_SLOT - OFFSETOF_PDE_NAME) + extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); @@ -189,6 +196,7 @@ static inline bool is_empty_pde(const struct proc_dir_entry *pde) { return S_ISDIR(pde->mode) && !pde->proc_iops; } +extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, loff_t *); /* * inode.c diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 7d94fa005b0d..d5e0fcb3439e 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -46,6 +46,9 @@ static int seq_open_net(struct inode *inode, struct file *file) WARN_ON_ONCE(state_size < sizeof(*p)); + if (file->f_mode & FMODE_WRITE && !PDE(inode)->write) + return -EACCES; + net = get_proc_net(inode); if (!net) return -ENXIO; @@ -73,6 +76,7 @@ static int seq_release_net(struct inode *ino, struct file *f) static const struct file_operations proc_net_seq_fops = { .open = seq_open_net, .read = seq_read, + .write = proc_simple_write, .llseek = seq_lseek, .release = seq_release_net, }; @@ -93,6 +97,50 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(proc_create_net_data); +/** + * proc_create_net_data_write - Create a writable net_ns-specific proc file + * @name: The name of the file. + * @mode: The file's access mode. + * @parent: The parent directory in which to create. + * @ops: The seq_file ops with which to read the file. + * @write: The write method which which to 'modify' the file. + * @data: Data for retrieval by PDE_DATA(). + * + * Create a network namespaced proc file in the @parent directory with the + * specified @name and @mode that allows reading of a file that displays a + * series of elements and also provides for the file accepting writes that have + * some arbitrary effect. + * + * The functions in the @ops table are used to iterate over items to be + * presented and extract the readable content using the seq_file interface. + * + * The @write function is called with the data copied into a kernel space + * scratch buffer and has a NUL appended for convenience. The buffer may be + * modified by the @write function. @write should return 0 on success. + * + * The @data value is accessible from the @show and @write functions by calling + * PDE_DATA() on the file inode. The network namespace must be accessed by + * calling seq_file_net() on the seq_file struct. + */ +struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode, + struct proc_dir_entry *parent, + const struct seq_operations *ops, + proc_write_t write, + unsigned int state_size, void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_seq_fops; + p->seq_ops = ops; + p->state_size = state_size; + p->write = write; + return proc_register(parent, p); +} +EXPORT_SYMBOL_GPL(proc_create_net_data_write); + static int single_open_net(struct inode *inode, struct file *file) { struct proc_dir_entry *de = PDE(inode); @@ -119,6 +167,7 @@ static int single_release_net(struct inode *ino, struct file *f) static const struct file_operations proc_net_single_fops = { .open = single_open_net, .read = seq_read, + .write = proc_simple_write, .llseek = seq_lseek, .release = single_release_net, }; @@ -138,6 +187,49 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(proc_create_net_single); +/** + * proc_create_net_single_write - Create a writable net_ns-specific proc file + * @name: The name of the file. + * @mode: The file's access mode. + * @parent: The parent directory in which to create. + * @show: The seqfile show method with which to read the file. + * @write: The write method which which to 'modify' the file. + * @data: Data for retrieval by PDE_DATA(). + * + * Create a network-namespaced proc file in the @parent directory with the + * specified @name and @mode that allows reading of a file that displays a + * single element rather than a series and also provides for the file accepting + * writes that have some arbitrary effect. + * + * The @show function is called to extract the readable content via the + * seq_file interface. + * + * The @write function is called with the data copied into a kernel space + * scratch buffer and has a NUL appended for convenience. The buffer may be + * modified by the @write function. @write should return 0 on success. + * + * The @data value is accessible from the @show and @write functions by calling + * PDE_DATA() on the file inode. The network namespace must be accessed by + * calling seq_file_single_net() on the seq_file struct. + */ +struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), + proc_write_t write, + void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_single_fops; + p->single_show = show; + p->write = write; + return proc_register(parent, p); +} +EXPORT_SYMBOL_GPL(proc_create_net_single_write); + static struct net *get_proc_task_net(struct inode *dir) { struct task_struct *task; diff --git a/fs/proc/root.c b/fs/proc/root.c index 61b7340b357a..f4b1a9d2eca6 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -204,8 +204,7 @@ struct proc_dir_entry proc_root = { .proc_fops = &proc_root_operations, .parent = &proc_root, .subdir = RB_ROOT, - .name = proc_root.inline_name, - .inline_name = "/proc", + .name = "/proc", }; int pid_ns_prepare_proc(struct pid_namespace *ns) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e9679016271f..dfd73a4616ce 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -831,7 +831,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) SEQ_PUT_DEC(" kB\nSwap: ", mss->swap); SEQ_PUT_DEC(" kB\nSwapPss: ", mss->swap_pss >> PSS_SHIFT); - SEQ_PUT_DEC(" kB\nLocked: ", mss->pss >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nLocked: ", + mss->pss_locked >> PSS_SHIFT); seq_puts(m, " kB\n"); } if (!rollup_mode) { diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 3bd12f955867..3f723cb478af 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -10,7 +10,7 @@ static int uptime_proc_show(struct seq_file *m, void *v) { struct timespec uptime; - struct timespec idle; + struct timespec64 idle; u64 nsec; u32 rem; int i; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index dc720573fd53..c238ab8ba31d 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -328,7 +328,7 @@ void pstore_record_init(struct pstore_record *record, record->psi = psinfo; /* Report zeroed timestamp if called before timekeeping has resumed. */ - record->time = ns_to_timespec(ktime_get_real_fast_ns()); + record->time = ns_to_timespec64(ktime_get_real_fast_ns()); } /* diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 49b2bc114868..bbd1e357c23d 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -153,21 +153,23 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, return prz; } -static int ramoops_read_kmsg_hdr(char *buffer, struct timespec *time, +static int ramoops_read_kmsg_hdr(char *buffer, struct timespec64 *time, bool *compressed) { char data_type; int header_length = 0; - if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n%n", &time->tv_sec, - &time->tv_nsec, &data_type, &header_length) == 3) { + if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lld.%lu-%c\n%n", + (time64_t *)&time->tv_sec, &time->tv_nsec, &data_type, + &header_length) == 3) { if (data_type == 'C') *compressed = true; else *compressed = false; - } else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu\n%n", - &time->tv_sec, &time->tv_nsec, &header_length) == 2) { - *compressed = false; + } else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lld.%lu\n%n", + (time64_t *)&time->tv_sec, &time->tv_nsec, + &header_length) == 2) { + *compressed = false; } else { time->tv_sec = 0; time->tv_nsec = 0; @@ -360,8 +362,8 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz, char *hdr; size_t len; - hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n", - record->time.tv_sec, + hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lld.%06lu-%c\n", + (time64_t)record->time.tv_sec, record->time.tv_nsec / 1000, record->compressed ? 'C' : 'D'); WARN_ON_ONCE(!hdr); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d88231e3b2be..fc20e06c56ba 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -711,21 +711,18 @@ EXPORT_SYMBOL(dquot_quota_sync); static unsigned long dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct list_head *head; struct dquot *dquot; unsigned long freed = 0; spin_lock(&dq_list_lock); - head = free_dquots.prev; - while (head != &free_dquots && sc->nr_to_scan) { - dquot = list_entry(head, struct dquot, dq_free); + while (!list_empty(&free_dquots) && sc->nr_to_scan) { + dquot = list_first_entry(&free_dquots, struct dquot, dq_free); remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); sc->nr_to_scan--; freed++; - head = free_dquots.prev; } spin_unlock(&dq_list_lock); return freed; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 5089dac02660..97f3fc4fdd79 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -1316,7 +1316,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, int jbegin_count; umode_t old_inode_mode; unsigned long savelink = 1; - struct timespec ctime; + struct timespec64 ctime; if (flags & ~RENAME_NOREPLACE) return -EINVAL; diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 7e288d97adcb..9fed1c05f1f4 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -76,83 +76,99 @@ static char *le_type(struct reiserfs_key *key) } /* %k */ -static void sprintf_le_key(char *buf, struct reiserfs_key *key) +static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id), - le32_to_cpu(key->k_objectid), le_offset(key), - le_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + le32_to_cpu(key->k_dir_id), + le32_to_cpu(key->k_objectid), le_offset(key), + le_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } /* %K */ -static void sprintf_cpu_key(char *buf, struct cpu_key *key) +static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, - key->on_disk_key.k_objectid, reiserfs_cpu_offset(key), - cpu_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + key->on_disk_key.k_dir_id, + key->on_disk_key.k_objectid, + reiserfs_cpu_offset(key), cpu_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh) +static int scnprintf_de_head(char *buf, size_t size, + struct reiserfs_de_head *deh) { if (deh) - sprintf(buf, - "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", - deh_offset(deh), deh_dir_id(deh), deh_objectid(deh), - deh_location(deh), deh_state(deh)); + return scnprintf(buf, size, + "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", + deh_offset(deh), deh_dir_id(deh), + deh_objectid(deh), deh_location(deh), + deh_state(deh)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_item_head(char *buf, struct item_head *ih) +static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih) { if (ih) { - strcpy(buf, - (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); - sprintf_le_key(buf + strlen(buf), &(ih->ih_key)); - sprintf(buf + strlen(buf), ", item_len %d, item_location %d, " - "free_space(entry_count) %d", - ih_item_len(ih), ih_location(ih), ih_free_space(ih)); + char *p = buf; + char * const end = buf + size; + + p += scnprintf(p, end - p, "%s", + (ih_version(ih) == KEY_FORMAT_3_6) ? + "*3.6* " : "*3.5*"); + + p += scnprintf_le_key(p, end - p, &ih->ih_key); + + p += scnprintf(p, end - p, + ", item_len %d, item_location %d, free_space(entry_count) %d", + ih_item_len(ih), ih_location(ih), + ih_free_space(ih)); + return p - buf; } else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de) +static int scnprintf_direntry(char *buf, size_t size, + struct reiserfs_dir_entry *de) { char name[20]; memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0; - sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); + return scnprintf(buf, size, "\"%s\"==>[%d %d]", + name, de->de_dir_id, de->de_objectid); } -static void sprintf_block_head(char *buf, struct buffer_head *bh) +static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ", - B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); + return scnprintf(buf, size, + "level=%d, nr_items=%d, free_space=%d rdkey ", + B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); } -static void sprintf_buffer_head(char *buf, struct buffer_head *bh) +static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, - "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", - bh->b_bdev, bh->b_size, - (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)), - bh->b_state, bh->b_page, - buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", - buffer_dirty(bh) ? "DIRTY" : "CLEAN", - buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); + return scnprintf(buf, size, + "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", + bh->b_bdev, bh->b_size, + (unsigned long long)bh->b_blocknr, + atomic_read(&(bh->b_count)), + bh->b_state, bh->b_page, + buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", + buffer_dirty(bh) ? "DIRTY" : "CLEAN", + buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); } -static void sprintf_disk_child(char *buf, struct disk_child *dc) +static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc) { - sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), - dc_size(dc)); + return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]", + dc_block_number(dc), dc_size(dc)); } static char *is_there_reiserfs_struct(char *fmt, int *what) @@ -189,55 +205,60 @@ static void prepare_error_buf(const char *fmt, va_list args) char *fmt1 = fmt_buf; char *k; char *p = error_buf; + char * const end = &error_buf[sizeof(error_buf)]; int what; spin_lock(&error_lock); - strcpy(fmt1, fmt); + if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) { + strscpy(error_buf, "format string too long", end - error_buf); + goto out_unlock; + } while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) { *k = 0; - p += vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); switch (what) { case 'k': - sprintf_le_key(p, va_arg(args, struct reiserfs_key *)); + p += scnprintf_le_key(p, end - p, + va_arg(args, struct reiserfs_key *)); break; case 'K': - sprintf_cpu_key(p, va_arg(args, struct cpu_key *)); + p += scnprintf_cpu_key(p, end - p, + va_arg(args, struct cpu_key *)); break; case 'h': - sprintf_item_head(p, va_arg(args, struct item_head *)); + p += scnprintf_item_head(p, end - p, + va_arg(args, struct item_head *)); break; case 't': - sprintf_direntry(p, - va_arg(args, - struct reiserfs_dir_entry *)); + p += scnprintf_direntry(p, end - p, + va_arg(args, struct reiserfs_dir_entry *)); break; case 'y': - sprintf_disk_child(p, - va_arg(args, struct disk_child *)); + p += scnprintf_disk_child(p, end - p, + va_arg(args, struct disk_child *)); break; case 'z': - sprintf_block_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_block_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'b': - sprintf_buffer_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_buffer_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'a': - sprintf_de_head(p, - va_arg(args, - struct reiserfs_de_head *)); + p += scnprintf_de_head(p, end - p, + va_arg(args, struct reiserfs_de_head *)); break; } - p += strlen(p); fmt1 = k + 2; } - vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); +out_unlock: spin_unlock(&error_lock); } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5dbf5324bdda..ff94fad477e4 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -451,10 +451,10 @@ int reiserfs_commit_write(struct file *f, struct page *page, static void update_ctime(struct inode *inode) { - struct timespec now = current_time(inode); + struct timespec64 now = current_time(inode); if (inode_unhashed(inode) || !inode->i_nlink || - timespec_equal(&inode->i_ctime, &now)) + timespec64_equal(&inode->i_ctime, &now)) return; inode->i_ctime = current_time(inode); diff --git a/fs/select.c b/fs/select.c index 317891ff8165..4a6b6e4b21cb 100644 --- a/fs/select.c +++ b/fs/select.c @@ -34,29 +34,6 @@ #include <linux/uaccess.h> -__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) -{ - if (file->f_op->poll) { - return file->f_op->poll(file, pt); - } else if (file_has_poll_mask(file)) { - unsigned int events = poll_requested_events(pt); - struct wait_queue_head *head; - - if (pt && pt->_qproc) { - head = file->f_op->get_poll_head(file, events); - if (!head) - return DEFAULT_POLLMASK; - if (IS_ERR(head)) - return EPOLLERR; - pt->_qproc(file, head, pt); - } - - return file->f_op->poll_mask(file, events); - } else { - return DEFAULT_POLLMASK; - } -} -EXPORT_SYMBOL_GPL(vfs_poll); /* * Estimate expected accuracy in ns from a timeval. diff --git a/fs/signalfd.c b/fs/signalfd.c index cbb42f77a2bd..4fcd1498acf5 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -259,10 +259,8 @@ static const struct file_operations signalfd_fops = { .llseek = noop_llseek, }; -static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, - int flags) +static int do_signalfd4(int ufd, sigset_t *mask, int flags) { - sigset_t sigmask; struct signalfd_ctx *ctx; /* Check the SFD_* constants for consistency. */ @@ -272,18 +270,15 @@ static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK)) return -EINVAL; - if (sizemask != sizeof(sigset_t) || - copy_from_user(&sigmask, user_mask, sizeof(sigmask))) - return -EINVAL; - sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - signotset(&sigmask); + sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + signotset(mask); if (ufd == -1) { ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - ctx->sigmask = sigmask; + ctx->sigmask = *mask; /* * When we call this, the initialization must be complete, since @@ -303,7 +298,7 @@ static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, return -EINVAL; } spin_lock_irq(¤t->sighand->siglock); - ctx->sigmask = sigmask; + ctx->sigmask = *mask; spin_unlock_irq(¤t->sighand->siglock); wake_up(¤t->sighand->signalfd_wqh); @@ -316,46 +311,51 @@ static int do_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, size_t, sizemask, int, flags) { - return do_signalfd4(ufd, user_mask, sizemask, flags); + sigset_t mask; + + if (sizemask != sizeof(sigset_t) || + copy_from_user(&mask, user_mask, sizeof(mask))) + return -EINVAL; + return do_signalfd4(ufd, &mask, flags); } SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask, size_t, sizemask) { - return do_signalfd4(ufd, user_mask, sizemask, 0); + sigset_t mask; + + if (sizemask != sizeof(sigset_t) || + copy_from_user(&mask, user_mask, sizeof(mask))) + return -EINVAL; + return do_signalfd4(ufd, &mask, 0); } #ifdef CONFIG_COMPAT static long do_compat_signalfd4(int ufd, - const compat_sigset_t __user *sigmask, + const compat_sigset_t __user *user_mask, compat_size_t sigsetsize, int flags) { - sigset_t tmp; - sigset_t __user *ksigmask; + sigset_t mask; if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; - if (get_compat_sigset(&tmp, sigmask)) - return -EFAULT; - ksigmask = compat_alloc_user_space(sizeof(sigset_t)); - if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) + if (get_compat_sigset(&mask, user_mask)) return -EFAULT; - - return do_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags); + return do_signalfd4(ufd, &mask, flags); } COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd, - const compat_sigset_t __user *, sigmask, + const compat_sigset_t __user *, user_mask, compat_size_t, sigsetsize, int, flags) { - return do_compat_signalfd4(ufd, sigmask, sigsetsize, flags); + return do_compat_signalfd4(ufd, user_mask, sigsetsize, flags); } COMPAT_SYSCALL_DEFINE3(signalfd, int, ufd, - const compat_sigset_t __user *,sigmask, + const compat_sigset_t __user *, user_mask, compat_size_t, sigsetsize) { - return do_compat_signalfd4(ufd, sigmask, sigsetsize, 0); + return do_compat_signalfd4(ufd, user_mask, sigsetsize, 0); } #endif diff --git a/fs/splice.c b/fs/splice.c index 2365ab073a27..b3daa971f597 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1243,38 +1243,26 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, * For lack of a better implementation, implement vmsplice() to userspace * as a simple copy of the pipes pages to the user iov. */ -static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, - unsigned long nr_segs, unsigned int flags) +static long vmsplice_to_user(struct file *file, struct iov_iter *iter, + unsigned int flags) { - struct pipe_inode_info *pipe; - struct splice_desc sd; - long ret; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter iter; + struct pipe_inode_info *pipe = get_pipe_info(file); + struct splice_desc sd = { + .total_len = iov_iter_count(iter), + .flags = flags, + .u.data = iter + }; + long ret = 0; - pipe = get_pipe_info(file); if (!pipe) return -EBADF; - ret = import_iovec(READ, uiov, nr_segs, - ARRAY_SIZE(iovstack), &iov, &iter); - if (ret < 0) - return ret; - - sd.total_len = iov_iter_count(&iter); - sd.len = 0; - sd.flags = flags; - sd.u.data = &iter; - sd.pos = 0; - if (sd.total_len) { pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, pipe_to_user); pipe_unlock(pipe); } - kfree(iov); return ret; } @@ -1283,14 +1271,11 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. */ -static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov, - unsigned long nr_segs, unsigned int flags) +static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, + unsigned int flags) { struct pipe_inode_info *pipe; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; - struct iov_iter from; - long ret; + long ret = 0; unsigned buf_flag = 0; if (flags & SPLICE_F_GIFT) @@ -1300,22 +1285,31 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov, if (!pipe) return -EBADF; - ret = import_iovec(WRITE, uiov, nr_segs, - ARRAY_SIZE(iovstack), &iov, &from); - if (ret < 0) - return ret; - pipe_lock(pipe); ret = wait_for_space(pipe, flags); if (!ret) - ret = iter_to_pipe(&from, pipe, buf_flag); + ret = iter_to_pipe(iter, pipe, buf_flag); pipe_unlock(pipe); if (ret > 0) wakeup_pipe_readers(pipe); - kfree(iov); return ret; } +static int vmsplice_type(struct fd f, int *type) +{ + if (!f.file) + return -EBADF; + if (f.file->f_mode & FMODE_WRITE) { + *type = WRITE; + } else if (f.file->f_mode & FMODE_READ) { + *type = READ; + } else { + fdput(f); + return -EBADF; + } + return 0; +} + /* * Note that vmsplice only really supports true splicing _from_ user memory * to a pipe, not the other way around. Splicing from user memory is a simple @@ -1332,57 +1326,69 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov, * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ -static long do_vmsplice(int fd, const struct iovec __user *iov, - unsigned long nr_segs, unsigned int flags) +static long do_vmsplice(struct file *f, struct iov_iter *iter, unsigned int flags) { - struct fd f; - long error; - if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; - if (unlikely(nr_segs > UIO_MAXIOV)) - return -EINVAL; - else if (unlikely(!nr_segs)) - return 0; - error = -EBADF; - f = fdget(fd); - if (f.file) { - if (f.file->f_mode & FMODE_WRITE) - error = vmsplice_to_pipe(f.file, iov, nr_segs, flags); - else if (f.file->f_mode & FMODE_READ) - error = vmsplice_to_user(f.file, iov, nr_segs, flags); - - fdput(f); - } + if (!iov_iter_count(iter)) + return 0; - return error; + if (iov_iter_rw(iter) == WRITE) + return vmsplice_to_pipe(f, iter, flags); + else + return vmsplice_to_user(f, iter, flags); } -SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, +SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, unsigned long, nr_segs, unsigned int, flags) { - return do_vmsplice(fd, iov, nr_segs, flags); + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + long error; + struct fd f; + int type; + + f = fdget(fd); + error = vmsplice_type(f, &type); + if (error) + return error; + + error = import_iovec(type, uiov, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (!error) { + error = do_vmsplice(f.file, &iter, flags); + kfree(iov); + } + fdput(f); + return error; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32, unsigned int, nr_segs, unsigned int, flags) { - unsigned i; - struct iovec __user *iov; - if (nr_segs > UIO_MAXIOV) - return -EINVAL; - iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec)); - for (i = 0; i < nr_segs; i++) { - struct compat_iovec v; - if (get_user(v.iov_base, &iov32[i].iov_base) || - get_user(v.iov_len, &iov32[i].iov_len) || - put_user(compat_ptr(v.iov_base), &iov[i].iov_base) || - put_user(v.iov_len, &iov[i].iov_len)) - return -EFAULT; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov = iovstack; + struct iov_iter iter; + long error; + struct fd f; + int type; + + f = fdget(fd); + error = vmsplice_type(f, &type); + if (error) + return error; + + error = compat_import_iovec(type, iov32, nr_segs, + ARRAY_SIZE(iovstack), &iov, &iter); + if (!error) { + error = do_vmsplice(f.file, &iter, flags); + kfree(iov); } - return do_vmsplice(fd, iov, nr_segs, flags); + fdput(f); + return error; } #endif diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2751476e6b6e..f098b9f1c396 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -167,6 +167,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, } if (compressed) { + if (!msblk->stream) + goto read_failure; length = squashfs_decompress(msblk, bh, b, offset, length, output); if (length < 0) diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 23813c078cc9..0839efa720b3 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -350,6 +350,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); + if (unlikely(length < 0)) + return -EIO; + while (length) { entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); if (entry->error) { diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 13d80947bf9e..f1c1430ae721 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -194,7 +194,11 @@ static long long read_indexes(struct super_block *sb, int n, } for (i = 0; i < blocks; i++) { - int size = le32_to_cpu(blist[i]); + int size = squashfs_block_size(blist[i]); + if (size < 0) { + err = size; + goto failure; + } block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); } n -= blocks; @@ -367,7 +371,24 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) sizeof(size)); if (res < 0) return res; - return le32_to_cpu(size); + return squashfs_block_size(size); +} + +void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail) +{ + int copied; + void *pageaddr; + + pageaddr = kmap_atomic(page); + copied = squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + copied, 0, PAGE_SIZE - copied); + kunmap_atomic(pageaddr); + + flush_dcache_page(page); + if (copied == avail) + SetPageUptodate(page); + else + SetPageError(page); } /* Copy data into page cache */ @@ -376,7 +397,6 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - void *pageaddr; int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; int start_index = page->index & ~mask, end_index = start_index | mask; @@ -402,12 +422,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, if (PageUptodate(push_page)) goto skip_page; - pageaddr = kmap_atomic(push_page); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(push_page); - SetPageUptodate(push_page); + squashfs_fill_page(push_page, buffer, offset, avail); skip_page: unlock_page(push_page); if (i != page->index) @@ -416,10 +431,9 @@ skip_page: } /* Read datablock stored packed inside a fragment (tail-end packed block) */ -static int squashfs_readpage_fragment(struct page *page) +static int squashfs_readpage_fragment(struct page *page, int expected) { struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); @@ -430,23 +444,16 @@ static int squashfs_readpage_fragment(struct page *page) squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); else - squashfs_copy_cache(page, buffer, i_size_read(inode) & - (msblk->block_size - 1), + squashfs_copy_cache(page, buffer, expected, squashfs_i(inode)->fragment_offset); squashfs_cache_put(buffer); return res; } -static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +static int squashfs_readpage_sparse(struct page *page, int expected) { - struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - - squashfs_copy_cache(page, NULL, bytes, 0); + squashfs_copy_cache(page, NULL, expected, 0); return 0; } @@ -456,6 +463,9 @@ static int squashfs_readpage(struct file *file, struct page *page) struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; int index = page->index >> (msblk->block_log - PAGE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; + int expected = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; int res; void *pageaddr; @@ -474,11 +484,11 @@ static int squashfs_readpage(struct file *file, struct page *page) goto error_out; if (bsize == 0) - res = squashfs_readpage_sparse(page, index, file_end); + res = squashfs_readpage_sparse(page, expected); else - res = squashfs_readpage_block(page, block, bsize); + res = squashfs_readpage_block(page, block, bsize, expected); } else - res = squashfs_readpage_fragment(page); + res = squashfs_readpage_fragment(page, expected); if (!res) return 0; diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index f2310d2a2019..a9ba8d96776a 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -20,7 +20,7 @@ #include "squashfs.h" /* Read separately compressed datablock and memcopy into page cache */ -int squashfs_readpage_block(struct page *page, u64 block, int bsize) +int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected) { struct inode *i = page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, @@ -31,7 +31,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize) ERROR("Unable to read page, block %llx, size %x\n", block, bsize); else - squashfs_copy_cache(page, buffer, buffer->length, 0); + squashfs_copy_cache(page, buffer, expected, 0); squashfs_cache_put(buffer); return res; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index cb485d8e0e91..80db1b86a27c 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -21,10 +21,11 @@ #include "page_actor.h" static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page); + int pages, struct page **page, int bytes); /* Read separately compressed datablock directly into page cache */ -int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, + int expected) { struct inode *inode = target_page->mapping->host; @@ -83,7 +84,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) * using an intermediate buffer. */ res = squashfs_read_cache(target_page, block, bsize, pages, - page); + page, expected); if (res < 0) goto mark_errored; @@ -95,6 +96,11 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) if (res < 0) goto mark_errored; + if (res != expected) { + res = -EIO; + goto mark_errored; + } + /* Last page may have trailing bytes not filled */ bytes = res % PAGE_SIZE; if (bytes) { @@ -138,13 +144,12 @@ out: static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page) + int pages, struct page **page, int bytes) { struct inode *i = target_page->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, block, bsize); - int bytes = buffer->length, res = buffer->error, n, offset = 0; - void *pageaddr; + int res = buffer->error, n, offset = 0; if (res) { ERROR("Unable to read page, block %llx, size %x\n", block, @@ -159,12 +164,7 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, if (page[n] == NULL) continue; - pageaddr = kmap_atomic(page[n]); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(page[n]); - SetPageUptodate(page[n]); + squashfs_fill_page(page[n], buffer, offset, avail); unlock_page(page[n]); if (page[n] != target_page) put_page(page[n]); diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 0ed6edbc5c71..0681feab4a84 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, u64 *fragment_block) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int block = SQUASHFS_FRAGMENT_INDEX(fragment); - int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); - u64 start_block = le64_to_cpu(msblk->fragment_index[block]); + int block, offset, size; struct squashfs_fragment_entry fragment_entry; - int size; + u64 start_block; + + if (fragment >= msblk->fragments) + return -EIO; + block = SQUASHFS_FRAGMENT_INDEX(fragment); + offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); + + start_block = le64_to_cpu(msblk->fragment_index[block]); size = squashfs_read_metadata(sb, &fragment_entry, &start_block, &offset, sizeof(fragment_entry)); @@ -61,9 +66,7 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, return size; *fragment_block = le64_to_cpu(fragment_entry.start_block); - size = le32_to_cpu(fragment_entry.size); - - return size; + return squashfs_block_size(fragment_entry.size); } diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 887d6d270080..f89f8a74c6ce 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -67,11 +67,12 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); /* file.c */ +void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int); void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); /* file_xxx.c */ -extern int squashfs_readpage_block(struct page *, u64, int); +extern int squashfs_readpage_block(struct page *, u64, int, int); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 24d12fd14177..4e6853f084d0 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -129,6 +129,12 @@ #define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK)) +static inline int squashfs_block_size(__le32 raw) +{ + u32 size = le32_to_cpu(raw); + return (size >> 25) ? -EIO : size; +} + /* * Inode number ops. Inodes consist of a compressed block number, and an * uncompressed offset within that block diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1da565cb50c3..ef69c31947bf 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -75,6 +75,7 @@ struct squashfs_sb_info { unsigned short block_log; long long bytes_used; unsigned int inodes; + unsigned int fragments; int xattr_ids; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 8a73b97217c8..40e657386fa5 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -175,6 +175,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->inode_table = le64_to_cpu(sblk->inode_table_start); msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); + msblk->fragments = le32_to_cpu(sblk->fragments); flags = le16_to_cpu(sblk->flags); TRACE("Found valid superblock on %pg\n", sb->s_bdev); @@ -185,7 +186,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) TRACE("Filesystem size %lld bytes\n", msblk->bytes_used); TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); - TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments)); + TRACE("Number of fragments %d\n", msblk->fragments); TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); @@ -272,7 +273,7 @@ allocate_id_index_table: sb->s_export_op = &squashfs_export_ops; handle_fragments: - fragments = le32_to_cpu(sblk->fragments); + fragments = msblk->fragments; if (fragments == 0) goto check_directory_table; diff --git a/fs/timerfd.c b/fs/timerfd.c index d84a2bee4f82..d69ad801eb80 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -66,7 +66,7 @@ static void timerfd_triggered(struct timerfd_ctx *ctx) spin_lock_irqsave(&ctx->wqh.lock, flags); ctx->expired = 1; ctx->ticks++; - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, EPOLLIN); spin_unlock_irqrestore(&ctx->wqh.lock, flags); } @@ -107,7 +107,7 @@ void timerfd_clock_was_set(void) if (ctx->moffs != moffs) { ctx->moffs = KTIME_MAX; ctx->ticks++; - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, EPOLLIN); } spin_unlock_irqrestore(&ctx->wqh.lock, flags); } @@ -226,20 +226,21 @@ static int timerfd_release(struct inode *inode, struct file *file) kfree_rcu(ctx, rcu); return 0; } - -static struct wait_queue_head *timerfd_get_poll_head(struct file *file, - __poll_t eventmask) + +static __poll_t timerfd_poll(struct file *file, poll_table *wait) { struct timerfd_ctx *ctx = file->private_data; + __poll_t events = 0; + unsigned long flags; - return &ctx->wqh; -} + poll_wait(file, &ctx->wqh, wait); -static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask) -{ - struct timerfd_ctx *ctx = file->private_data; + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->ticks) + events |= EPOLLIN; + spin_unlock_irqrestore(&ctx->wqh.lock, flags); - return ctx->ticks ? EPOLLIN : 0; + return events; } static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, @@ -344,7 +345,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg spin_lock_irq(&ctx->wqh.lock); if (!timerfd_canceled(ctx)) { ctx->ticks = ticks; - wake_up_locked(&ctx->wqh); + wake_up_locked_poll(&ctx->wqh, EPOLLIN); } else ret = -ECANCELED; spin_unlock_irq(&ctx->wqh.lock); @@ -363,8 +364,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg static const struct file_operations timerfd_fops = { .release = timerfd_release, - .get_poll_head = timerfd_get_poll_head, - .poll_mask = timerfd_poll_mask, + .poll = timerfd_poll, .read = timerfd_read, .llseek = noop_llseek, .show_fdinfo = timerfd_show, @@ -533,8 +533,8 @@ static int do_timerfd_gettime(int ufd, struct itimerspec64 *t) } SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, - const struct itimerspec __user *, utmr, - struct itimerspec __user *, otmr) + const struct __kernel_itimerspec __user *, utmr, + struct __kernel_itimerspec __user *, otmr) { struct itimerspec64 new, old; int ret; @@ -550,7 +550,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, return ret; } -SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) +SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr) { struct itimerspec64 kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); @@ -559,7 +559,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0; } -#ifdef CONFIG_COMPAT +#ifdef CONFIG_COMPAT_32BIT_TIME COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, const struct compat_itimerspec __user *, utmr, struct compat_itimerspec __user *, otmr) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 4e267cc21c77..9da224d4f2da 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1276,7 +1276,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, .dirtied_ino = 3 }; struct ubifs_budget_req ino_req = { .dirtied_ino = 1, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; - struct timespec time; + struct timespec64 time; unsigned int uninitialized_var(saved_nlink); struct fscrypt_name old_nm, new_nm; @@ -1504,7 +1504,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, int sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); struct inode *fst_inode = d_inode(old_dentry); struct inode *snd_inode = d_inode(new_dentry); - struct timespec time; + struct timespec64 time; int err; struct fscrypt_name fst_nm, snd_nm; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 28b80713a163..fd7eb6fe9090 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1089,14 +1089,14 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr) if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; if (attr->ia_valid & ATTR_ATIME) - inode->i_atime = timespec_trunc(attr->ia_atime, - inode->i_sb->s_time_gran); + inode->i_atime = timespec64_trunc(attr->ia_atime, + inode->i_sb->s_time_gran); if (attr->ia_valid & ATTR_MTIME) - inode->i_mtime = timespec_trunc(attr->ia_mtime, - inode->i_sb->s_time_gran); + inode->i_mtime = timespec64_trunc(attr->ia_mtime, + inode->i_sb->s_time_gran); if (attr->ia_valid & ATTR_CTIME) - inode->i_ctime = timespec_trunc(attr->ia_ctime, - inode->i_sb->s_time_gran); + inode->i_ctime = timespec64_trunc(attr->ia_ctime, + inode->i_sb->s_time_gran); if (attr->ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; @@ -1367,8 +1367,9 @@ out: static inline int mctime_update_needed(const struct inode *inode, const struct timespec *now) { - if (!timespec_equal(&inode->i_mtime, now) || - !timespec_equal(&inode->i_ctime, now)) + struct timespec64 now64 = timespec_to_timespec64(*now); + if (!timespec64_equal(&inode->i_mtime, &now64) || + !timespec64_equal(&inode->i_ctime, &now64)) return 1; return 0; } @@ -1380,7 +1381,7 @@ static inline int mctime_update_needed(const struct inode *inode, * * This function updates time of the inode. */ -int ubifs_update_time(struct inode *inode, struct timespec *time, +int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags) { struct ubifs_inode *ui = ubifs_inode(inode); @@ -1424,7 +1425,7 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, */ static int update_mctime(struct inode *inode) { - struct timespec now = current_time(inode); + struct timespec now = timespec64_to_timespec(current_time(inode)); struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -1518,7 +1519,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; - struct timespec now = current_time(inode); + struct timespec now = timespec64_to_timespec(current_time(inode)); struct ubifs_budget_req req = { .new_page = 1 }; int err, update_time; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 209d6369ae71..04bf84d71e7b 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1738,7 +1738,7 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc); int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); #ifdef CONFIG_UBIFS_ATIME_SUPPORT -int ubifs_update_time(struct inode *inode, struct timespec *time, int flags); +int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags); #endif /* dir.c */ diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 1b961b1d9699..fcda0fc97b90 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -533,8 +533,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, udf_write_aext(table, &epos, &eloc, (etype << 30) | elen, 1); } else - udf_delete_aext(table, epos, eloc, - (etype << 30) | elen); + udf_delete_aext(table, epos); } else { alloc_count = 0; } @@ -630,7 +629,7 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb, if (goal_elen) udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1); else - udf_delete_aext(table, goal_epos, goal_eloc, goal_elen); + udf_delete_aext(table, goal_epos); brelse(goal_epos.bh); udf_add_free_space(sb, partition, -1); diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 0a98a2369738..d9523013096f 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -141,10 +141,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, fibh->ebh->b_data, sizeof(struct fileIdentDesc) + fibh->soffset); - fi_len = (sizeof(struct fileIdentDesc) + - cfi->lengthFileIdent + - le16_to_cpu(cfi->lengthOfImpUse) + 3) & ~3; - + fi_len = udf_dir_entry_len(cfi); *nf_pos += fi_len - (fibh->eoffset - fibh->soffset); fibh->eoffset = fibh->soffset + fi_len; } else { @@ -152,6 +149,9 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, sizeof(struct fileIdentDesc)); } } + /* Got last entry outside of dir size - fs is corrupted! */ + if (*nf_pos > dir->i_size) + return NULL; return fi; } diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index b7a0d4b4bda1..56569023783b 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -124,8 +124,8 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; else iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; - inode->i_mtime = inode->i_atime = inode->i_ctime = - iinfo->i_crtime = current_time(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + iinfo->i_crtime = timespec64_to_timespec(inode->i_mtime); if (unlikely(insert_inode_locked(inode) < 0)) { make_bad_inode(inode); iput(inode); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index c80765d62f7e..9915a58fbabd 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1147,8 +1147,7 @@ static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr if (startnum > endnum) { for (i = 0; i < (startnum - endnum); i++) - udf_delete_aext(inode, *epos, laarr[i].extLocation, - laarr[i].extLength); + udf_delete_aext(inode, *epos); } else if (startnum < endnum) { for (i = 0; i < (endnum - startnum); i++) { udf_insert_aext(inode, *epos, laarr[i].extLocation, @@ -1271,6 +1270,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode) struct udf_inode_info *iinfo = UDF_I(inode); struct udf_sb_info *sbi = UDF_SB(inode->i_sb); struct kernel_lb_addr *iloc = &iinfo->i_location; + struct timespec ts; unsigned int link_count; unsigned int indirections = 0; int bs = inode->i_sb->s_blocksize; @@ -1443,15 +1443,12 @@ reread: inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); - if (!udf_disk_stamp_to_time(&inode->i_atime, fe->accessTime)) - inode->i_atime = sbi->s_record_time; - - if (!udf_disk_stamp_to_time(&inode->i_mtime, - fe->modificationTime)) - inode->i_mtime = sbi->s_record_time; - - if (!udf_disk_stamp_to_time(&inode->i_ctime, fe->attrTime)) - inode->i_ctime = sbi->s_record_time; + udf_disk_stamp_to_time(&ts, fe->accessTime); + inode->i_atime = timespec_to_timespec64(ts); + udf_disk_stamp_to_time(&ts, fe->modificationTime); + inode->i_mtime = timespec_to_timespec64(ts); + udf_disk_stamp_to_time(&ts, fe->attrTime); + inode->i_ctime = timespec_to_timespec64(ts); iinfo->i_unique = le64_to_cpu(fe->uniqueID); iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); @@ -1461,18 +1458,13 @@ reread: inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); - if (!udf_disk_stamp_to_time(&inode->i_atime, efe->accessTime)) - inode->i_atime = sbi->s_record_time; - - if (!udf_disk_stamp_to_time(&inode->i_mtime, - efe->modificationTime)) - inode->i_mtime = sbi->s_record_time; - - if (!udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime)) - iinfo->i_crtime = sbi->s_record_time; - - if (!udf_disk_stamp_to_time(&inode->i_ctime, efe->attrTime)) - inode->i_ctime = sbi->s_record_time; + udf_disk_stamp_to_time(&ts, efe->accessTime); + inode->i_atime = timespec_to_timespec64(ts); + udf_disk_stamp_to_time(&ts, efe->modificationTime); + inode->i_mtime = timespec_to_timespec64(ts); + udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime); + udf_disk_stamp_to_time(&ts, efe->attrTime); + inode->i_ctime = timespec_to_timespec64(ts); iinfo->i_unique = le64_to_cpu(efe->uniqueID); iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); @@ -1722,9 +1714,12 @@ static int udf_update_inode(struct inode *inode, int do_sync) inode->i_sb->s_blocksize - sizeof(struct fileEntry)); fe->logicalBlocksRecorded = cpu_to_le64(lb_recorded); - udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime); - udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime); - udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime); + udf_time_to_disk_stamp(&fe->accessTime, + timespec64_to_timespec(inode->i_atime)); + udf_time_to_disk_stamp(&fe->modificationTime, + timespec64_to_timespec(inode->i_mtime)); + udf_time_to_disk_stamp(&fe->attrTime, + timespec64_to_timespec(inode->i_ctime)); memset(&(fe->impIdent), 0, sizeof(struct regid)); strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER); fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; @@ -1743,14 +1738,17 @@ static int udf_update_inode(struct inode *inode, int do_sync) efe->objectSize = cpu_to_le64(inode->i_size); efe->logicalBlocksRecorded = cpu_to_le64(lb_recorded); - udf_adjust_time(iinfo, inode->i_atime); - udf_adjust_time(iinfo, inode->i_mtime); - udf_adjust_time(iinfo, inode->i_ctime); + udf_adjust_time(iinfo, timespec64_to_timespec(inode->i_atime)); + udf_adjust_time(iinfo, timespec64_to_timespec(inode->i_mtime)); + udf_adjust_time(iinfo, timespec64_to_timespec(inode->i_ctime)); - udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime); - udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime); + udf_time_to_disk_stamp(&efe->accessTime, + timespec64_to_timespec(inode->i_atime)); + udf_time_to_disk_stamp(&efe->modificationTime, + timespec64_to_timespec(inode->i_mtime)); udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime); - udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime); + udf_time_to_disk_stamp(&efe->attrTime, + timespec64_to_timespec(inode->i_ctime)); memset(&(efe->impIdent), 0, sizeof(efe->impIdent)); strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER); @@ -2177,14 +2175,15 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos, return (nelen >> 30); } -int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, - struct kernel_lb_addr eloc, uint32_t elen) +int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) { struct extent_position oepos; int adsize; int8_t etype; struct allocExtDesc *aed; struct udf_inode_info *iinfo; + struct kernel_lb_addr eloc; + uint32_t elen; if (epos.bh) { get_bh(epos.bh); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index c586026508db..58cc2414992b 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -351,8 +351,6 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, loff_t f_pos; loff_t size = udf_ext0_offset(dir) + dir->i_size; int nfidlen; - uint8_t lfi; - uint16_t liu; udf_pblk_t block; struct kernel_lb_addr eloc; uint32_t elen = 0; @@ -383,7 +381,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, namelen = 0; } - nfidlen = (sizeof(struct fileIdentDesc) + namelen + 3) & ~3; + nfidlen = ALIGN(sizeof(struct fileIdentDesc) + namelen, UDF_NAME_PAD); f_pos = udf_ext0_offset(dir); @@ -424,12 +422,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, goto out_err; } - liu = le16_to_cpu(cfi->lengthOfImpUse); - lfi = cfi->lengthFileIdent; - if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) { - if (((sizeof(struct fileIdentDesc) + - liu + lfi + 3) & ~3) == nfidlen) { + if (udf_dir_entry_len(cfi) == nfidlen) { cfi->descTag.tagSerialNum = cpu_to_le16(1); cfi->fileVersionNum = cpu_to_le16(1); cfi->fileCharacteristics = 0; @@ -608,8 +602,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode) fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); if (unlikely(!fi)) { inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); @@ -700,8 +693,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); if (!fi) { inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); goto out; } set_nlink(inode, 2); @@ -719,8 +711,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (!fi) { clear_nlink(inode); mark_inode_dirty(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); goto out; } cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); @@ -1047,8 +1038,7 @@ out: out_no_entry: up_write(&iinfo->i_data_sem); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); goto out; } @@ -1201,9 +1191,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, if (dir_fi) { dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location); - udf_update_tag((char *)dir_fi, - (sizeof(struct fileIdentDesc) + - le16_to_cpu(dir_fi->lengthOfImpUse) + 3) & ~3); + udf_update_tag((char *)dir_fi, udf_dir_entry_len(dir_fi)); if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) mark_inode_dirty(old_inode); else diff --git a/fs/udf/super.c b/fs/udf/super.c index fc77ea736da7..0c504c8031d3 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -862,6 +862,9 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) struct buffer_head *bh; uint16_t ident; int ret = -ENOMEM; +#ifdef UDFFS_DEBUG + struct timestamp *ts; +#endif outstr = kmalloc(128, GFP_NOFS); if (!outstr) @@ -880,15 +883,15 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) pvoldesc = (struct primaryVolDesc *)bh->b_data; - if (udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time, - pvoldesc->recordingDateAndTime)) { + udf_disk_stamp_to_time(&UDF_SB(sb)->s_record_time, + pvoldesc->recordingDateAndTime); #ifdef UDFFS_DEBUG - struct timestamp *ts = &pvoldesc->recordingDateAndTime; - udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n", - le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, - ts->minute, le16_to_cpu(ts->typeAndTimezone)); + ts = &pvoldesc->recordingDateAndTime; + udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n", + le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, + ts->minute, le16_to_cpu(ts->typeAndTimezone)); #endif - } + ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32); if (ret < 0) diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index fc8d1b3384d2..84c47dde4d26 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -132,6 +132,12 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb, extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, struct fileIdentDesc *, struct udf_fileident_bh *, uint8_t *, uint8_t *); +static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi) +{ + return ALIGN(sizeof(struct fileIdentDesc) + + le16_to_cpu(cfi->lengthOfImpUse) + cfi->lengthFileIdent, + UDF_NAME_PAD); +} /* file.c */ extern long udf_ioctl(struct file *, unsigned int, unsigned long); @@ -167,8 +173,7 @@ extern int udf_add_aext(struct inode *, struct extent_position *, struct kernel_lb_addr *, uint32_t, int); extern void udf_write_aext(struct inode *, struct extent_position *, struct kernel_lb_addr *, uint32_t, int); -extern int8_t udf_delete_aext(struct inode *, struct extent_position, - struct kernel_lb_addr, uint32_t); +extern int8_t udf_delete_aext(struct inode *, struct extent_position); extern int8_t udf_next_aext(struct inode *, struct extent_position *, struct kernel_lb_addr *, uint32_t *, int); extern int8_t udf_current_aext(struct inode *, struct extent_position *, @@ -253,8 +258,8 @@ extern struct long_ad *udf_get_filelongad(uint8_t *, int, uint32_t *, int); extern struct short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t *, int); /* udftime.c */ -extern struct timespec *udf_disk_stamp_to_time(struct timespec *dest, +extern void udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src); -extern struct timestamp *udf_time_to_disk_stamp(struct timestamp *dest, struct timespec src); +extern void udf_time_to_disk_stamp(struct timestamp *dest, struct timespec src); #endif /* __UDF_DECL_H */ diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index 0927a4b2ecaf..67b33ac5d41b 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -40,7 +40,7 @@ #include <linux/kernel.h> #include <linux/time.h> -struct timespec * +void udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src) { u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone); @@ -67,10 +67,9 @@ udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src) * recorded with bogus sub-second values. */ dest->tv_nsec %= NSEC_PER_SEC; - return dest; } -struct timestamp * +void udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts) { long seconds; @@ -79,9 +78,6 @@ udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts) offset = -sys_tz.tz_minuteswest; - if (!dest) - return NULL; - dest->typeAndTimezone = cpu_to_le16(0x1000 | (offset & 0x0FFF)); seconds = ts.tv_sec + offset * 60; @@ -97,7 +93,6 @@ udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts) dest->centiseconds * 10000) / 100; dest->microseconds = (ts.tv_nsec / 1000 - dest->centiseconds * 10000 - dest->hundredsOfMicroseconds * 100); - return dest; } /* EOF */ diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index e1ef0f0a1353..02c0a4be4212 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -343,8 +343,7 @@ cg_found: fail_remove_inode: mutex_unlock(&sbi->s_lock); clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); failed: diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index d5f43ba76c59..9ef40f100415 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -43,8 +43,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) return 0; } inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -142,8 +141,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, out_fail: inode_dec_link_count(inode); - unlock_new_inode(inode); - iput(inode); + discard_new_inode(inode); return err; } @@ -198,8 +196,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); - unlock_new_inode(inode); - iput (inode); + discard_new_inode(inode); out_dir: inode_dec_link_count(dir); return err; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 123bf7d516fc..bad9cea37f12 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -222,24 +222,26 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, unsigned long reason) { struct mm_struct *mm = ctx->mm; - pte_t *pte; + pte_t *ptep, pte; bool ret = true; VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); - pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); - if (!pte) + ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); + + if (!ptep) goto out; ret = false; + pte = huge_ptep_get(ptep); /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. */ - if (huge_pte_none(*pte)) + if (huge_pte_none(pte)) ret = true; - if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP)) + if (!huge_pte_write(pte) && (reason & VM_UFFD_WP)) ret = true; out: return ret; @@ -631,8 +633,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ down_write(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); + } up_write(&mm->mmap_sem); userfaultfd_ctx_put(release_new_ctx); diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 84db76e0e3e3..fecd187fcf2c 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -157,6 +157,7 @@ __xfs_ag_resv_free( error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true); resv->ar_reserved = 0; resv->ar_asked = 0; + resv->ar_orig_reserved = 0; if (error) trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno, @@ -189,13 +190,34 @@ __xfs_ag_resv_init( struct xfs_mount *mp = pag->pag_mount; struct xfs_ag_resv *resv; int error; - xfs_extlen_t reserved; + xfs_extlen_t hidden_space; if (used > ask) ask = used; - reserved = ask - used; - error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); + switch (type) { + case XFS_AG_RESV_RMAPBT: + /* + * Space taken by the rmapbt is not subtracted from fdblocks + * because the rmapbt lives in the free space. Here we must + * subtract the entire reservation from fdblocks so that we + * always have blocks available for rmapbt expansion. + */ + hidden_space = ask; + break; + case XFS_AG_RESV_METADATA: + /* + * Space taken by all other metadata btrees are accounted + * on-disk as used space. We therefore only hide the space + * that is reserved but not used by the trees. + */ + hidden_space = ask - used; + break; + default: + ASSERT(0); + return -EINVAL; + } + error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true); if (error) { trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, error, _RET_IP_); @@ -216,7 +238,8 @@ __xfs_ag_resv_init( resv = xfs_perag_resv(pag, type); resv->ar_asked = ask; - resv->ar_reserved = resv->ar_orig_reserved = reserved; + resv->ar_orig_reserved = hidden_space; + resv->ar_reserved = ask - used; trace_xfs_ag_resv_init(pag, type, ask); return 0; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index eef466260d43..75dbdc14c45f 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -223,12 +223,13 @@ xfs_alloc_get_rec( error = xfs_btree_get_rec(cur, &rec, stat); if (error || !(*stat)) return error; - if (rec->alloc.ar_blockcount == 0) - goto out_bad_rec; *bno = be32_to_cpu(rec->alloc.ar_startblock); *len = be32_to_cpu(rec->alloc.ar_blockcount); + if (*len == 0) + goto out_bad_rec; + /* check for valid extent range, including overflow */ if (!xfs_verify_agbno(mp, agno, *bno)) goto out_bad_rec; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 01628f0c9a0c..7205268b30bc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5780,6 +5780,32 @@ del_cursor: return error; } +/* Make sure we won't be right-shifting an extent past the maximum bound. */ +int +xfs_bmap_can_insert_extents( + struct xfs_inode *ip, + xfs_fileoff_t off, + xfs_fileoff_t shift) +{ + struct xfs_bmbt_irec got; + int is_empty; + int error = 0; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty); + if (!error && !is_empty && got.br_startoff >= off && + ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff) + error = -EINVAL; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + return error; +} + int xfs_bmap_insert_extents( struct xfs_trans *tp, diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 99dddbd0fcc6..9b49ddf99c41 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -227,6 +227,8 @@ int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops); +int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off, + xfs_fileoff_t shift); int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 1c5a8aaf2bfc..059bc44c27e8 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -962,6 +962,9 @@ typedef enum xfs_dinode_fmt { XFS_DFORK_DSIZE(dip, mp) : \ XFS_DFORK_ASIZE(dip, mp)) +#define XFS_DFORK_MAXEXT(dip, mp, w) \ + (XFS_DFORK_SIZE(dip, mp, w) / sizeof(struct xfs_bmbt_rec)) + /* * Return pointers to the data or attribute forks. */ @@ -1526,6 +1529,8 @@ typedef struct xfs_bmdr_block { #define BMBT_STARTBLOCK_BITLEN 52 #define BMBT_BLOCKCOUNT_BITLEN 21 +#define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1) + typedef struct xfs_bmbt_rec { __be64 l0, l1; } xfs_bmbt_rec_t; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index d38d724534c4..30d1d60f1d46 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -374,6 +374,47 @@ xfs_log_dinode_to_disk( } } +static xfs_failaddr_t +xfs_dinode_verify_fork( + struct xfs_dinode *dip, + struct xfs_mount *mp, + int whichfork) +{ + uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); + + switch (XFS_DFORK_FORMAT(dip, whichfork)) { + case XFS_DINODE_FMT_LOCAL: + /* + * no local regular files yet + */ + if (whichfork == XFS_DATA_FORK) { + if (S_ISREG(be16_to_cpu(dip->di_mode))) + return __this_address; + if (be64_to_cpu(dip->di_size) > + XFS_DFORK_SIZE(dip, mp, whichfork)) + return __this_address; + } + if (di_nextents) + return __this_address; + break; + case XFS_DINODE_FMT_EXTENTS: + if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) + return __this_address; + break; + case XFS_DINODE_FMT_BTREE: + if (whichfork == XFS_ATTR_FORK) { + if (di_nextents > MAXAEXTNUM) + return __this_address; + } else if (di_nextents > MAXEXTNUM) { + return __this_address; + } + break; + default: + return __this_address; + } + return NULL; +} + xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, @@ -441,24 +482,9 @@ xfs_dinode_verify( case S_IFREG: case S_IFLNK: case S_IFDIR: - switch (dip->di_format) { - case XFS_DINODE_FMT_LOCAL: - /* - * no local regular files yet - */ - if (S_ISREG(mode)) - return __this_address; - if (di_size > XFS_DFORK_DSIZE(dip, mp)) - return __this_address; - if (dip->di_nextents) - return __this_address; - /* fall through */ - case XFS_DINODE_FMT_EXTENTS: - case XFS_DINODE_FMT_BTREE: - break; - default: - return __this_address; - } + fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); + if (fa) + return fa; break; case 0: /* Uninitialized inode ok. */ @@ -468,17 +494,9 @@ xfs_dinode_verify( } if (XFS_DFORK_Q(dip)) { - switch (dip->di_aformat) { - case XFS_DINODE_FMT_LOCAL: - if (dip->di_anextents) - return __this_address; - /* fall through */ - case XFS_DINODE_FMT_EXTENTS: - case XFS_DINODE_FMT_BTREE: - break; - default: - return __this_address; - } + fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); + if (fa) + return fa; } else { /* * If there is no fork offset, this may be a freshly-made inode @@ -713,7 +731,8 @@ xfs_inode_validate_extsize( if ((hint_flag || inherit_flag) && extsize == 0) return __this_address; - if (!(hint_flag || inherit_flag) && extsize != 0) + /* free inodes get flags set to zero but extsize remains */ + if (mode && !(hint_flag || inherit_flag) && extsize != 0) return __this_address; if (extsize_bytes % blocksize_bytes) @@ -759,7 +778,8 @@ xfs_inode_validate_cowextsize( if (hint_flag && cowextsize == 0) return __this_address; - if (!hint_flag && cowextsize != 0) + /* free inodes get flags set to zero but cowextsize remains */ + if (mode && !hint_flag && cowextsize != 0) return __this_address; if (hint_flag && rt_flag) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 65fc4ed2e9a1..b228c821bae6 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1029,8 +1029,8 @@ xfs_rtalloc_query_range( if (low_rec->ar_startext >= mp->m_sb.sb_rextents || low_rec->ar_startext == high_rec->ar_startext) return 0; - if (high_rec->ar_startext >= mp->m_sb.sb_rextents) - high_rec->ar_startext = mp->m_sb.sb_rextents - 1; + if (high_rec->ar_startext > mp->m_sb.sb_rextents) + high_rec->ar_startext = mp->m_sb.sb_rextents; /* Iterate the bitmap, looking for discrepancies. */ rtstart = low_rec->ar_startext; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c35009a86699..83b1e8c6c18f 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -685,12 +685,10 @@ out_unlock_iolock: } /* - * dead simple method of punching delalyed allocation blocks from a range in - * the inode. Walks a block at a time so will be slow, but is only executed in - * rare error cases so the overhead is not critical. This will always punch out - * both the start and end blocks, even if the ranges only partially overlap - * them, so it is up to the caller to ensure that partial blocks are not - * passed in. + * Dead simple method of punching delalyed allocation blocks from a range in + * the inode. This will always punch out both the start and end blocks, even + * if the ranges only partially overlap them, so it is up to the caller to + * ensure that partial blocks are not passed in. */ int xfs_bmap_punch_delalloc_range( @@ -698,63 +696,44 @@ xfs_bmap_punch_delalloc_range( xfs_fileoff_t start_fsb, xfs_fileoff_t length) { - xfs_fileoff_t remaining = length; + struct xfs_ifork *ifp = &ip->i_df; + xfs_fileoff_t end_fsb = start_fsb + length; + struct xfs_bmbt_irec got, del; + struct xfs_iext_cursor icur; int error = 0; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - do { - int done; - xfs_bmbt_irec_t imap; - int nimaps = 1; - xfs_fsblock_t firstblock; - struct xfs_defer_ops dfops; + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + if (error) + return error; + } - /* - * Map the range first and check that it is a delalloc extent - * before trying to unmap the range. Otherwise we will be - * trying to remove a real extent (which requires a - * transaction) or a hole, which is probably a bad idea... - */ - error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps, - XFS_BMAPI_ENTIRE); + if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) + return 0; - if (error) { - /* something screwed, just bail */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_alert(ip->i_mount, - "Failed delalloc mapping lookup ino %lld fsb %lld.", - ip->i_ino, start_fsb); - } - break; - } - if (!nimaps) { - /* nothing there */ - goto next_block; - } - if (imap.br_startblock != DELAYSTARTBLOCK) { - /* been converted, ignore */ - goto next_block; - } - WARN_ON(imap.br_blockcount == 0); + while (got.br_startoff + got.br_blockcount > start_fsb) { + del = got; + xfs_trim_extent(&del, start_fsb, length); /* - * Note: while we initialise the firstblock/dfops pair, they - * should never be used because blocks should never be - * allocated or freed for a delalloc extent and hence we need - * don't cancel or finish them after the xfs_bunmapi() call. + * A delete can push the cursor forward. Step back to the + * previous extent on non-delalloc or extents outside the + * target range. */ - xfs_defer_init(&dfops, &firstblock); - error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, - &dfops, &done); - if (error) - break; + if (!del.br_blockcount || + !isnullstartblock(del.br_startblock)) { + if (!xfs_iext_prev_extent(ifp, &icur, &got)) + break; + continue; + } - ASSERT(!xfs_defer_has_unfinished_work(&dfops)); -next_block: - start_fsb++; - remaining--; - } while(remaining > 0); + error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, + &got, &del); + if (error || !xfs_iext_get_extent(ifp, &icur, &got)) + break; + } return error; } @@ -1208,7 +1187,22 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); + error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); + if (error) + return error; + + /* + * If we zeroed right up to EOF and EOF straddles a page boundary we + * must make sure that the post-EOF area is also zeroed because the + * page could be mmap'd and iomap_zero_range doesn't do that for us. + * Writeback of the eof page will do this, albeit clumsily. + */ + if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) { + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + (offset + len) & ~PAGE_MASK, LLONG_MAX); + } + + return error; } /* @@ -1404,6 +1398,10 @@ xfs_insert_file_space( trace_xfs_insert_file_space(ip); + error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb); + if (error) + return error; + error = xfs_prepare_shift(ip, offset); if (error) return error; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index c34fa9c342f2..c7157bc48bd1 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -513,8 +513,8 @@ xfs_getfsmap_rtdev_rtbitmap_query( struct xfs_trans *tp, struct xfs_getfsmap_info *info) { - struct xfs_rtalloc_rec alow; - struct xfs_rtalloc_rec ahigh; + struct xfs_rtalloc_rec alow = { 0 }; + struct xfs_rtalloc_rec ahigh = { 0 }; int error; xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a7afcad6b711..3f2bd6032cf8 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -387,7 +387,7 @@ xfs_reserve_blocks( do { free = percpu_counter_sum(&mp->m_fdblocks) - mp->m_alloc_set_aside; - if (!free) + if (free <= 0) break; delta = request - mp->m_resblks; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4a2e5e13c569..5df4de666cc1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -761,7 +761,7 @@ xfs_ialloc( xfs_inode_t *ip; uint flags; int error; - struct timespec tv; + struct timespec64 tv; struct inode *inode; /* @@ -3236,7 +3236,6 @@ xfs_iflush_cluster( struct xfs_inode *cip; int nr_found; int clcount = 0; - int bufwasdelwri; int i; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -3360,37 +3359,22 @@ cluster_corrupt_out: * inode buffer and shut down the filesystem. */ rcu_read_unlock(); - /* - * Clean up the buffer. If it was delwri, just release it -- - * brelse can handle it with no problems. If not, shut down the - * filesystem before releasing the buffer. - */ - bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q); - if (bufwasdelwri) - xfs_buf_relse(bp); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - if (!bufwasdelwri) { - /* - * Just like incore_relse: if we have b_iodone functions, - * mark the buffer as an error and call them. Otherwise - * mark it as stale and brelse. - */ - if (bp->b_iodone) { - bp->b_flags &= ~XBF_DONE; - xfs_buf_stale(bp); - xfs_buf_ioerror(bp, -EIO); - xfs_buf_ioend(bp); - } else { - xfs_buf_stale(bp); - xfs_buf_relse(bp); - } - } - /* - * Unlocks the flush lock + * We'll always have an inode attached to the buffer for completion + * process by the time we are called from xfs_iflush(). Hence we have + * always need to do IO completion processing to abort the inodes + * attached to the buffer. handle them just like the shutdown case in + * xfs_buf_submit(). */ + ASSERT(bp->b_iodone); + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); + xfs_buf_ioerror(bp, -EIO); + xfs_buf_ioend(bp); + + /* abort the corrupt inode, as it was not attached to the buffer */ xfs_iflush_abort(cip, false); kmem_free(cilist); xfs_perag_put(pag); @@ -3486,12 +3470,17 @@ xfs_iflush( xfs_log_force(mp, 0); /* - * inode clustering: - * see if other inodes can be gathered into this write + * inode clustering: try to gather other inodes into this write + * + * Note: Any error during clustering will result in the filesystem + * being shut down and completion callbacks run on the cluster buffer. + * As we have already flushed and attached this inode to the buffer, + * it has already been aborted and released by xfs_iflush_cluster() and + * so we have no further error handling to do here. */ error = xfs_iflush_cluster(ip, bp); if (error) - goto cluster_corrupt_out; + return error; *bpp = bp; return 0; @@ -3500,12 +3489,8 @@ corrupt_out: if (bp) xfs_buf_relse(bp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); -cluster_corrupt_out: - error = -EFSCORRUPTED; abort_out: - /* - * Unlocks the flush lock - */ + /* abort the corrupt inode, as it was not attached to the buffer */ xfs_iflush_abort(ip, false); return error; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 49f5492eed3b..55876dd02f0c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -963,12 +963,13 @@ xfs_ilock_for_iomap( unsigned *lockmode) { unsigned mode = XFS_ILOCK_SHARED; + bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO); /* * COW writes may allocate delalloc space or convert unwritten COW * extents, so we need to make sure to take the lock exclusively here. */ - if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) { + if (xfs_is_reflink_inode(ip) && is_write) { /* * FIXME: It could still overwrite on unshared extents and not * need allocation. @@ -989,6 +990,7 @@ xfs_ilock_for_iomap( mode = XFS_ILOCK_EXCL; } +relock: if (flags & IOMAP_NOWAIT) { if (!xfs_ilock_nowait(ip, mode)) return -EAGAIN; @@ -996,6 +998,17 @@ xfs_ilock_for_iomap( xfs_ilock(ip, mode); } + /* + * The reflink iflag could have changed since the earlier unlocked + * check, so if we got ILOCK_SHARED for a write and but we're now a + * reflink inode we have to switch to ILOCK_EXCL and relock. + */ + if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) { + xfs_iunlock(ip, mode); + mode = XFS_ILOCK_EXCL; + goto relock; + } + *lockmode = mode; return 0; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1fce707406c6..3a75de777843 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1042,7 +1042,7 @@ xfs_vn_setattr( STATIC int xfs_vn_update_time( struct inode *inode, - struct timespec *now, + struct timespec64 *now, int flags) { struct xfs_inode *ip = XFS_I(inode); @@ -1253,7 +1253,7 @@ xfs_setup_inode( inode_sb_list_add(inode); /* make the inode look hashed for the writeback code */ - hlist_add_fake(&inode->i_hash); + inode_fake_hash(inode); inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index e040af120b69..524f543c5b82 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -258,7 +258,12 @@ xfs_trans_alloc( if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); - WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); + /* + * Zero-reservation ("empty") transactions can't modify anything, so + * they're allowed to run while we're frozen. + */ + WARN_ON(resp->tr_logres > 0 && + mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); atomic_inc(&mp->m_active_trans); tp = kmem_zone_zalloc(xfs_trans_zone, diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index e2963a6033b2..542927321a61 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -58,7 +58,7 @@ xfs_trans_ichgtime( int flags) { struct inode *inode = VFS_I(ip); - struct timespec tv; + struct timespec64 tv; ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |