diff options
Diffstat (limited to 'net')
57 files changed, 394 insertions, 313 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 64468c49791f..deabbd376cb1 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -15,6 +15,13 @@ menuconfig NET_9P if NET_9P +config NET_9P_FD + default NET_9P + tristate "9P FD Transport" + help + This builds support for transports over TCP, Unix sockets and + filedescriptors. + config NET_9P_VIRTIO depends on VIRTIO tristate "9P Virtio Transport" diff --git a/net/9p/Makefile b/net/9p/Makefile index aa0a5641e5d0..1df9b344c30b 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_NET_9P) := 9pnet.o +obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o @@ -9,9 +10,11 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o client.o \ error.o \ protocol.o \ - trans_fd.o \ trans_common.o \ +9pnet_fd-objs := \ + trans_fd.o \ + 9pnet_virtio-objs := \ trans_virtio.o \ diff --git a/net/9p/client.c b/net/9p/client.c index d062f1e5bfb0..8bba0d9cf975 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1038,8 +1038,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto put_trans; - if (clnt->msize > clnt->trans_mod->maxsize) + if (clnt->msize > clnt->trans_mod->maxsize) { clnt->msize = clnt->trans_mod->maxsize; + pr_info("Limiting 'msize' to %d as this is the maximum " + "supported by transport %s\n", + clnt->msize, clnt->trans_mod->name + ); + } if (clnt->msize < 4096) { p9_debug(P9_DEBUG_ERROR, diff --git a/net/9p/mod.c b/net/9p/mod.c index c37fc201a944..55576c1866fa 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -83,7 +83,7 @@ void v9fs_unregister_trans(struct p9_trans_module *m) } EXPORT_SYMBOL(v9fs_unregister_trans); -static struct p9_trans_module *_p9_get_trans_by_name(char *s) +static struct p9_trans_module *_p9_get_trans_by_name(const char *s) { struct p9_trans_module *t, *found = NULL; @@ -106,7 +106,7 @@ static struct p9_trans_module *_p9_get_trans_by_name(char *s) * @s: string identifying transport * */ -struct p9_trans_module *v9fs_get_trans_by_name(char *s) +struct p9_trans_module *v9fs_get_trans_by_name(const char *s) { struct p9_trans_module *found = NULL; @@ -123,6 +123,10 @@ struct p9_trans_module *v9fs_get_trans_by_name(char *s) } EXPORT_SYMBOL(v9fs_get_trans_by_name); +static const char * const v9fs_default_transports[] = { + "virtio", "tcp", "fd", "unix", "xen", "rdma", +}; + /** * v9fs_get_default_trans - get the default transport * @@ -131,6 +135,7 @@ EXPORT_SYMBOL(v9fs_get_trans_by_name); struct p9_trans_module *v9fs_get_default_trans(void) { struct p9_trans_module *t, *found = NULL; + int i; spin_lock(&v9fs_trans_lock); @@ -148,6 +153,10 @@ struct p9_trans_module *v9fs_get_default_trans(void) } spin_unlock(&v9fs_trans_lock); + + for (i = 0; !found && i < ARRAY_SIZE(v9fs_default_transports); i++) + found = v9fs_get_trans_by_name(v9fs_default_transports[i]); + return found; } EXPORT_SYMBOL(v9fs_get_default_trans); @@ -177,7 +186,6 @@ static int __init init_p9(void) p9_error_init(); pr_info("Installing 9P2000 support\n"); - p9_trans_fd_init(); return ret; } @@ -191,7 +199,6 @@ static void __exit exit_p9(void) { pr_info("Unloading 9P2000 support\n"); - p9_trans_fd_exit(); p9_client_exit(); } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 827c47620fc0..8f8f95e39b03 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1090,6 +1090,7 @@ static struct p9_trans_module p9_tcp_trans = { .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; +MODULE_ALIAS_9P("tcp"); static struct p9_trans_module p9_unix_trans = { .name = "unix", @@ -1103,6 +1104,7 @@ static struct p9_trans_module p9_unix_trans = { .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; +MODULE_ALIAS_9P("unix"); static struct p9_trans_module p9_fd_trans = { .name = "fd", @@ -1116,6 +1118,7 @@ static struct p9_trans_module p9_fd_trans = { .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; +MODULE_ALIAS_9P("fd"); /** * p9_poll_workfn - poll worker thread @@ -1149,7 +1152,7 @@ static void p9_poll_workfn(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "finish\n"); } -int p9_trans_fd_init(void) +static int __init p9_trans_fd_init(void) { v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); @@ -1158,10 +1161,17 @@ int p9_trans_fd_init(void) return 0; } -void p9_trans_fd_exit(void) +static void __exit p9_trans_fd_exit(void) { flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); } + +module_init(p9_trans_fd_init); +module_exit(p9_trans_fd_exit); + +MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); +MODULE_DESCRIPTION("Filedescriptor Transport for 9P"); +MODULE_LICENSE("GPL"); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index bd5a89c4960d..b24a4fb0f0a2 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -648,7 +648,7 @@ fail: * @args: args passed from sys_mount() for per-transport options (unused) * * This sets up a transport channel for 9p communication. Right now - * we only match the first available channel, but eventually we couldlook up + * we only match the first available channel, but eventually we could look up * alternate channels by matching devname versus a virtio_config entry. * We use a simple reference count mechanism to ensure that only a single * mount has a channel open at a time. @@ -721,7 +721,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) mutex_unlock(&virtio_9p_lock); - vdev->config->reset(vdev); + virtio_reset_device(vdev); vdev->config->del_vqs(vdev); sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 2418fa0b58f3..eb9fb55280ef 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -538,6 +538,7 @@ static void p9_trans_xen_exit(void) } module_exit(p9_trans_xen_exit); +MODULE_ALIAS("xen:9pfs"); MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>"); MODULE_DESCRIPTION("Xen Transport for 9P"); MODULE_LICENSE("GPL"); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index c9add7753b9f..40baa6b7321a 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -535,7 +535,7 @@ static int bnep_session(void *arg) up_write(&bnep_session_sem); free_netdev(dev); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 83eb84e8e688..90d130588a3e 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -323,7 +323,7 @@ static int cmtp_session(void *arg) up_write(&cmtp_session_sem); kfree(session); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 80848dfc01db..5940744a8cd8 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg) l2cap_unregister_user(session->conn, &session->user); hidp_session_put(session); - module_put_and_exit(0); + module_put_and_kthread_exit(0); return 0; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index a52ad81596b7..55f47cadb114 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -615,6 +615,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, err = dev_set_allmulti(dev, 1); if (err) { br_multicast_del_port(p); + dev_put_track(dev, &p->dev_tracker); kfree(p); /* kobject not yet init'd, manually free */ goto err1; } @@ -724,10 +725,10 @@ err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: br_multicast_del_port(p); + dev_put_track(dev, &p->dev_tracker); kobject_put(&p->kobj); dev_set_allmulti(dev, -1); err1: - dev_put(dev); return err; } diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c index 5622763ad402..7e51f128045d 100644 --- a/net/ceph/buffer.c +++ b/net/ceph/buffer.c @@ -7,7 +7,7 @@ #include <linux/ceph/buffer.h> #include <linux/ceph/decode.h> -#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */ +#include <linux/ceph/libceph.h> /* for kvmalloc */ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) { @@ -17,7 +17,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) if (!b) return NULL; - b->vec.iov_base = ceph_kvmalloc(len, gfp); + b->vec.iov_base = kvmalloc(len, gfp); if (!b->vec.iov_base) { kfree(b); return NULL; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 97d6ea763e32..9441b4a4912b 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -190,33 +190,6 @@ int ceph_compare_options(struct ceph_options *new_opt, } EXPORT_SYMBOL(ceph_compare_options); -/* - * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are - * compatible with (a superset of) GFP_KERNEL. This is because while the - * actual pages are allocated with the specified flags, the page table pages - * are always allocated with GFP_KERNEL. - * - * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO. - */ -void *ceph_kvmalloc(size_t size, gfp_t flags) -{ - void *p; - - if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) { - p = kvmalloc(size, flags); - } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) { - unsigned int nofs_flag = memalloc_nofs_save(); - p = kvmalloc(size, GFP_KERNEL); - memalloc_nofs_restore(nofs_flag); - } else { - unsigned int noio_flag = memalloc_noio_save(); - p = kvmalloc(size, GFP_KERNEL); - memalloc_noio_restore(noio_flag); - } - - return p; -} - static int parse_fsid(const char *str, struct ceph_fsid *fsid) { int i = 0; diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 92d89b331645..051d22c0e4ad 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -147,7 +147,7 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key) static const u8 *aes_iv = (u8 *)CEPH_AES_IV; /* - * Should be used for buffers allocated with ceph_kvmalloc(). + * Should be used for buffers allocated with kvmalloc(). * Currently these are encrypt out-buffer (ceph_buffer) and decrypt * in-buffer (msg front). * diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 57d043b382ed..7b891be799d2 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1920,7 +1920,7 @@ struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, /* front */ if (front_len) { - m->front.iov_base = ceph_kvmalloc(front_len, flags); + m->front.iov_base = kvmalloc(front_len, flags); if (m->front.iov_base == NULL) { dout("ceph_msg_new can't allocate %d bytes\n", front_len); diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index cc40ce4e02fb..c4099b641b38 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -308,7 +308,7 @@ static void *alloc_conn_buf(struct ceph_connection *con, int len) if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs))) return NULL; - buf = ceph_kvmalloc(len, GFP_NOIO); + buf = kvmalloc(len, GFP_NOIO); if (!buf) return NULL; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 75b738083523..2823bb3cff55 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -980,7 +980,7 @@ static struct crush_work *alloc_workspace(const struct crush_map *c) work_size = crush_work_size(c, CEPH_PG_MAX_SIZE); dout("%s work_size %zu bytes\n", __func__, work_size); - work = ceph_kvmalloc(work_size, GFP_NOIO); + work = kvmalloc(work_size, GFP_NOIO); if (!work) return NULL; @@ -1190,9 +1190,9 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max) if (max == map->max_osd) return 0; - state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS); - weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS); - addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS); + state = kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS); + weight = kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS); + addr = kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS); if (!state || !weight || !addr) { kvfree(state); kvfree(weight); @@ -1222,7 +1222,7 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max) if (map->osd_primary_affinity) { u32 *affinity; - affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)), + affinity = kvmalloc(array_size(max, sizeof(*affinity)), GFP_NOFS); if (!affinity) return -ENOMEM; @@ -1503,7 +1503,7 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff) if (!map->osd_primary_affinity) { int i; - map->osd_primary_affinity = ceph_kvmalloc( + map->osd_primary_affinity = kvmalloc( array_size(map->max_osd, sizeof(*map->osd_primary_affinity)), GFP_NOFS); if (!map->osd_primary_affinity) diff --git a/net/core/dev.c b/net/core/dev.c index 84a0d9542fe9..1baab07820f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8981,6 +8981,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } old_prog = link->prog; + if (old_prog->type != new_prog->type || + old_prog->expected_attach_type != new_prog->expected_attach_type) { + err = -EINVAL; + goto out_unlock; + } + if (old_prog == new_prog) { /* no-op, don't disturb drivers */ bpf_prog_put(new_prog); diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index d8b9dbabd4a4..5b8016335aca 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -260,7 +260,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && + (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 9b7171c40434..a5b5bb99c644 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -164,8 +164,10 @@ static void ops_exit_list(const struct pernet_operations *ops, { struct net *net; if (ops->exit) { - list_for_each_entry(net, net_exit_list, exit_list) + list_for_each_entry(net, net_exit_list, exit_list) { ops->exit(net); + cond_resched(); + } } if (ops->exit_batch) ops->exit_batch(net_exit_list); diff --git a/net/core/of_net.c b/net/core/of_net.c index 95a64c813ae5..f1a9bf7578e7 100644 --- a/net/core/of_net.c +++ b/net/core/of_net.c @@ -61,7 +61,7 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) { struct platform_device *pdev = of_find_device_by_node(np); struct nvmem_cell *cell; - const void *buf; + const void *mac; size_t len; int ret; @@ -78,32 +78,21 @@ static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) if (IS_ERR(cell)) return PTR_ERR(cell); - buf = nvmem_cell_read(cell, &len); + mac = nvmem_cell_read(cell, &len); nvmem_cell_put(cell); - if (IS_ERR(buf)) - return PTR_ERR(buf); - - ret = 0; - if (len == ETH_ALEN) { - if (is_valid_ether_addr(buf)) - memcpy(addr, buf, ETH_ALEN); - else - ret = -EINVAL; - } else if (len == 3 * ETH_ALEN - 1) { - u8 mac[ETH_ALEN]; - - if (mac_pton(buf, mac)) - memcpy(addr, mac, ETH_ALEN); - else - ret = -EINVAL; - } else { - ret = -EINVAL; + if (IS_ERR(mac)) + return PTR_ERR(mac); + + if (len != ETH_ALEN || !is_valid_ether_addr(mac)) { + kfree(mac); + return -EINVAL; } - kfree(buf); + memcpy(addr, mac, ETH_ALEN); + kfree(mac); - return ret; + return 0; } /** diff --git a/net/core/sock.c b/net/core/sock.c index e21485ab285d..4ff806d71921 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -844,6 +844,8 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) } num = ethtool_get_phc_vclocks(dev, &vclock_index); + dev_put(dev); + for (i = 0; i < num; i++) { if (*(vclock_index + i) == phc_index) { match = true; @@ -2047,6 +2049,9 @@ void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); + WARN_ON_ONCE(!llist_empty(&sk->defer_list)); + sk_defer_free_flush(sk); + if (rcu_access_pointer(sk->sk_reuseport_cb)) { reuseport_detach_sock(sk); use_call_rcu = true; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 828de171708f..b4589861b84c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -29,6 +29,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/netlink.h> +#include <linux/hash.h> #include <net/arp.h> #include <net/ip.h> @@ -51,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; static unsigned int fib_info_hash_size; +static unsigned int fib_info_hash_bits; static unsigned int fib_info_cnt; #define DEVINDEX_HASHBITS 8 @@ -249,7 +251,6 @@ void free_fib_info(struct fib_info *fi) pr_warn("Freeing alive fib_info %p\n", fi); return; } - fib_info_cnt--; call_rcu(&fi->rcu, free_fib_info_rcu); } @@ -260,6 +261,10 @@ void fib_release_info(struct fib_info *fi) spin_lock_bh(&fib_info_lock); if (fi && refcount_dec_and_test(&fi->fib_treeref)) { hlist_del(&fi->fib_hash); + + /* Paired with READ_ONCE() in fib_create_info(). */ + WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1); + if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); if (fi->nh) { @@ -316,11 +321,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) static inline unsigned int fib_devindex_hashfn(unsigned int val) { - unsigned int mask = DEVINDEX_HASHSIZE - 1; + return hash_32(val, DEVINDEX_HASHBITS); +} + +static struct hlist_head * +fib_info_devhash_bucket(const struct net_device *dev) +{ + u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; - return (val ^ - (val >> DEVINDEX_HASHBITS) ^ - (val >> (DEVINDEX_HASHBITS * 2))) & mask; + return &fib_info_devhash[fib_devindex_hashfn(val)]; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, @@ -430,12 +439,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; - unsigned int hash; spin_lock(&fib_info_lock); - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); + hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev && nh->fib_nh_gw4 == gw && @@ -1240,13 +1248,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, return err; } -static inline unsigned int fib_laddr_hashfn(__be32 val) +static struct hlist_head * +fib_info_laddrhash_bucket(const struct net *net, __be32 val) { - unsigned int mask = (fib_info_hash_size - 1); + u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val, + fib_info_hash_bits); - return ((__force u32)val ^ - ((__force u32)val >> 7) ^ - ((__force u32)val >> 14)) & mask; + return &fib_info_laddrhash[slot]; } static struct hlist_head *fib_info_hash_alloc(int bytes) @@ -1282,6 +1290,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_info_hash_size = new_size; + fib_info_hash_bits = ilog2(new_size); for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; @@ -1299,21 +1308,20 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, } fib_info_hash = new_info_hash; + fib_info_laddrhash = new_laddrhash; for (i = 0; i < old_size; i++) { - struct hlist_head *lhead = &fib_info_laddrhash[i]; + struct hlist_head *lhead = &old_laddrhash[i]; struct hlist_node *n; struct fib_info *fi; hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { struct hlist_head *ldest; - unsigned int new_hash; - new_hash = fib_laddr_hashfn(fi->fib_prefsrc); - ldest = &new_laddrhash[new_hash]; + ldest = fib_info_laddrhash_bucket(fi->fib_net, + fi->fib_prefsrc); hlist_add_head(&fi->fib_lhash, ldest); } } - fib_info_laddrhash = new_laddrhash; spin_unlock_bh(&fib_info_lock); @@ -1430,7 +1438,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg, #endif err = -ENOBUFS; - if (fib_info_cnt >= fib_info_hash_size) { + + /* Paired with WRITE_ONCE() in fib_release_info() */ + if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) { unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; @@ -1462,7 +1472,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, return ERR_PTR(err); } - fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; @@ -1591,12 +1600,13 @@ link_it: refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); spin_lock_bh(&fib_info_lock); + fib_info_cnt++; hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { struct hlist_head *head; - head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; + head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc); hlist_add_head(&fi->fib_lhash, head); } if (fi->nh) { @@ -1604,12 +1614,10 @@ link_it: } else { change_nexthops(fi) { struct hlist_head *head; - unsigned int hash; if (!nexthop_nh->fib_nh_dev) continue; - hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } @@ -1870,16 +1878,16 @@ nla_put_failure: */ int fib_sync_down_addr(struct net_device *dev, __be32 local) { - int ret = 0; - unsigned int hash = fib_laddr_hashfn(local); - struct hlist_head *head = &fib_info_laddrhash[hash]; int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); + struct hlist_head *head; struct fib_info *fi; + int ret = 0; if (!fib_info_laddrhash || local == 0) return 0; + head = fib_info_laddrhash_bucket(net, local); hlist_for_each_entry(fi, head, fib_lhash) { if (!net_eq(fi->fib_net, net) || fi->fib_tb_id != tb_id) @@ -1961,8 +1969,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { @@ -1981,12 +1988,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { - int ret = 0; - int scope = RT_SCOPE_NOWHERE; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; + int ret = 0; if (force) scope = -1; @@ -2131,7 +2137,6 @@ out: int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { struct fib_info *prev_fi; - unsigned int hash; struct hlist_head *head; struct fib_nh *nh; int ret; @@ -2147,8 +2152,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 05cd198d7a6b..341096807100 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -235,9 +235,9 @@ void inet_frag_kill(struct inet_frag_queue *fq) /* The RCU read lock provides a memory barrier * guaranteeing that if fqdir->dead is false then * the hash table destruction will not start until - * after we unlock. Paired with inet_frags_exit_net(). + * after we unlock. Paired with fqdir_pre_exit(). */ - if (!fqdir->dead) { + if (!READ_ONCE(fqdir->dead)) { rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, fqdir->f->rhash_params); refcount_dec(&fq->refcnt); @@ -352,9 +352,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { + /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ + long high_thresh = READ_ONCE(fqdir->high_thresh); struct inet_frag_queue *fq = NULL, *prev; - if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh) + if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; rcu_read_lock(); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cfeb8890f94e..fad803d2d711 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t) rcu_read_lock(); - if (qp->q.fqdir->dead) + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(qp->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&qp->q.lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2ac2b95c5694..99db2e41ed10 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -604,8 +604,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) key = &info->key; ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, - tunnel_id_to_key32(key->tun_id), key->tos, 0, - skb->mark, skb_get_hash(skb)); + tunnel_id_to_key32(key->tun_id), + key->tos & ~INET_ECN_MASK, 0, skb->mark, + skb_get_hash(skb)); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3b75836db19b..78e81465f5f3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -842,6 +842,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } release_sock(sk); + sk_defer_free_flush(sk); if (spliced) return spliced; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 463c37dea449..413f66781e50 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i) fn = rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) - fn->fn_sernum = fib6_new_sernum(net); + WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net)); } /* @@ -590,12 +590,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; - cb->args[5] = w->root->fn_sernum; + cb->args[5] = READ_ONCE(w->root->fn_sernum); } } else { - if (cb->args[5] != w->root->fn_sernum) { + int sernum = READ_ONCE(w->root->fn_sernum); + if (cb->args[5] != sernum) { /* Begin at the root if the tree changed */ - cb->args[5] = w->root->fn_sernum; + cb->args[5] = sernum; w->state = FWS_INIT; w->node = w->root; w->skip = w->count; @@ -1345,7 +1346,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, /* paired with smp_rmb() in fib6_get_cookie_safe() */ smp_wmb(); while (fn) { - fn->fn_sernum = sernum; + WRITE_ONCE(fn->fn_sernum, sernum); fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&rt->fib6_table->tb6_lock)); } @@ -2174,8 +2175,8 @@ static int fib6_clean_node(struct fib6_walker *w) }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && - w->node->fn_sernum != c->sernum) - w->node->fn_sernum = c->sernum; + READ_ONCE(w->node->fn_sernum) != c->sernum) + WRITE_ONCE(w->node->fn_sernum, c->sernum); if (!c->func) { WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); @@ -2543,7 +2544,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, iter->w.state = FWS_INIT; iter->w.node = iter->w.root; iter->w.args = iter; - iter->sernum = iter->w.root->fn_sernum; + iter->sernum = READ_ONCE(iter->w.root->fn_sernum); INIT_LIST_HEAD(&iter->w.lh); fib6_walker_link(net, &iter->w); } @@ -2571,8 +2572,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) { - if (iter->sernum != iter->w.root->fn_sernum) { - iter->sernum = iter->w.root->fn_sernum; + int sernum = READ_ONCE(iter->w.root->fn_sernum); + + if (iter->sernum != sernum) { + iter->sernum = sernum; iter->w.state = FWS_INIT; iter->w.node = iter->w.root; WARN_ON(iter->w.skip); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fe786df4f849..97ade833f58c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Local address not yet configured!\n", - p->name); + pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", + p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", - p->name); + pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); else ret = 1; rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e6de94203c13..f4884cda13b9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (from) { fn = rcu_dereference(from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; + WRITE_ONCE(fn->fn_sernum, -1); } } rcu_read_unlock(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a618dce7e0bc..c0b138c20992 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -956,7 +956,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr); } - if (rt->rt_type != RTN_UNICAST) { + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { ip_rt_put(rt); dev->stats.tx_carrier_errors++; goto tx_error_icmp; diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 86ad15abf897..750f9f9b4daf 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -285,7 +285,7 @@ static void __mctp_route_test_init(struct kunit *test, struct mctp_test_route **rtp, struct socket **sockp) { - struct sockaddr_mctp addr; + struct sockaddr_mctp addr = {0}; struct mctp_test_route *rt; struct mctp_test_dev *dev; struct socket *sock; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 566ba4397ee4..8490e46359ae 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1537,7 +1537,7 @@ int __init netlbl_unlabel_defconf(void) /* Only the kernel is allowed to call this function and the only time * it is called is at bootup before the audit subsystem is reporting * messages so don't worry to much about these values. */ - security_task_getsecid_subj(current, &audit_info.secid); + security_current_getsecid_subj(&audit_info.secid); audit_info.loginuid = GLOBAL_ROOT_UID; audit_info.sessionid = 0; diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 6190cbf94bf0..d6c5b31eb4eb 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -32,7 +32,7 @@ */ static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info) { - security_task_getsecid_subj(current, &audit_info->secid); + security_current_getsecid_subj(&audit_info->secid); audit_info->loginuid = audit_get_loginuid(current); audit_info->sessionid = audit_get_sessionid(current); } diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 6cfd30fc0798..0b93a17b9f11 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -789,6 +789,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); + if (!llcp_sock->local) { + release_sock(sk); + return -ENODEV; + } + if (sk->sk_type == SOCK_DGRAM) { DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, msg->msg_name); diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index c027c76d493c..cc8fa9e36159 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -317,14 +317,13 @@ static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data, * Arguments: * * tty pointer to tty instance data - * file pointer to open file object for device * cmd IOCTL command code * arg argument for IOCTL call (cmd dependent) * * Return Value: Command dependent */ -static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file, - unsigned int cmd, unsigned long arg) +static int nci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd, + unsigned long arg) { struct nci_uart *nu = (void *)tty->disc_data; int err = 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5bd409ab4cc2..85ea7ddb48db 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1774,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; list_add(&match->list, &fanout_list); @@ -3353,6 +3354,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; + po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index fa611678af05..18196e1c8c2f 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -79,7 +79,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, int rc; /* start channels */ - rc = mhi_prepare_for_transfer(mhi_dev); + rc = mhi_prepare_for_transfer_autoqueue(mhi_dev); if (rc) return rc; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c9c6f49f9c28..179825a3b2fd 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1062,7 +1062,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, qdisc_offload_graft_root(dev, new, old, extack); - if (new && new->ops->attach) + if (new && new->ops->attach && !ingress) goto skip; for (i = 0; i < num_q; i++) { @@ -1204,7 +1204,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { - NL_SET_ERR_MSG(extack, "Specified qdisc not found"); + NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b07bd1c7330f..f893d9a81b01 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1529,6 +1529,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, { memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; + r->mpu = conf->mpu; r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index aa3bcaaeabf7..961854e56736 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -634,9 +634,13 @@ static void smc_conn_abort(struct smc_sock *smc, int local_first) { struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; + bool lgr_valid = false; + + if (smc_conn_lgr_valid(conn)) + lgr_valid = true; smc_conn_free(conn); - if (local_first) + if (local_first && lgr_valid) smc_lgr_cleanup_early(lgr); } diff --git a/net/smc/smc.h b/net/smc/smc.h index 1a4fc1c6c4ab..3d0b8e300deb 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -221,6 +221,7 @@ struct smc_connection { */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ + u8 freed : 1; /* normal termiation */ u8 out_of_sync : 1; /* out of sync with peer */ }; diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 84c8a4374fdd..9d5a97168969 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -197,7 +197,8 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) { int rc; - if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) + if (!smc_conn_lgr_valid(conn) || + (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) return -EPIPE; if (conn->lgr->is_smcd) { diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 6be95a2a7b25..ce27399b38b1 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -774,7 +774,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? SMC_FIRST_CONTACT_MASK : 0; - if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) && + if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) && smc_ib_is_valid_local_systemid()) memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8935ef4811b0..29525d03b253 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -211,14 +211,13 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - if (!lgr) + if (!smc_conn_lgr_valid(conn)) return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); - conn->lgr = NULL; } int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) @@ -749,9 +748,12 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, } get_device(&lnk->smcibdev->ibdev->dev); atomic_inc(&lnk->smcibdev->lnk_cnt); + refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */ + lnk->clearing = 0; lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; + smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ lnk->link_idx = link_idx; smc_ibdev_cnt_inc(lnk); smcr_copy_dev_info_to_link(lnk); @@ -806,6 +808,7 @@ out: lnk->state = SMC_LNK_UNUSED; if (!atomic_dec_return(&smcibdev->lnk_cnt)) wake_up(&smcibdev->lnks_deleted); + smc_lgr_put(lgr); /* lgr_hold above */ return rc; } @@ -844,6 +847,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->terminating = 0; lgr->freeing = 0; lgr->vlan_id = ini->vlan_id; + refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */ mutex_init(&lgr->sndbufs_lock); mutex_init(&lgr->rmbs_lock); rwlock_init(&lgr->conns_lock); @@ -996,8 +1000,12 @@ void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk) { atomic_dec(&conn->lnk->conn_cnt); + /* link_hold in smc_conn_create() */ + smcr_link_put(conn->lnk); conn->lnk = to_lnk; atomic_inc(&conn->lnk->conn_cnt); + /* link_put in smc_conn_free() */ + smcr_link_hold(conn->lnk); } struct smc_link *smc_switch_conns(struct smc_link_group *lgr, @@ -1130,8 +1138,19 @@ void smc_conn_free(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - if (!lgr) + if (!lgr || conn->freed) + /* Connection has never been registered in a + * link group, or has already been freed. + */ return; + + conn->freed = 1; + if (!smc_conn_lgr_valid(conn)) + /* Connection has already unregistered from + * link group. + */ + goto lgr_put; + if (lgr->is_smcd) { if (!list_empty(&lgr->list)) smc_ism_unset_conn(conn); @@ -1148,6 +1167,10 @@ void smc_conn_free(struct smc_connection *conn) if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); +lgr_put: + if (!lgr->is_smcd) + smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */ + smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */ } /* unregister a link from a buf_desc */ @@ -1203,13 +1226,29 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk) } } -/* must be called under lgr->llc_conf_mutex lock */ -void smcr_link_clear(struct smc_link *lnk, bool log) +static void __smcr_link_clear(struct smc_link *lnk) { + struct smc_link_group *lgr = lnk->lgr; struct smc_ib_device *smcibdev; - if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) + smc_wr_free_link_mem(lnk); + smc_ibdev_cnt_dec(lnk); + put_device(&lnk->smcibdev->ibdev->dev); + smcibdev = lnk->smcibdev; + memset(lnk, 0, sizeof(struct smc_link)); + lnk->state = SMC_LNK_UNUSED; + if (!atomic_dec_return(&smcibdev->lnk_cnt)) + wake_up(&smcibdev->lnks_deleted); + smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */ +} + +/* must be called under lgr->llc_conf_mutex lock */ +void smcr_link_clear(struct smc_link *lnk, bool log) +{ + if (!lnk->lgr || lnk->clearing || + lnk->state == SMC_LNK_UNUSED) return; + lnk->clearing = 1; lnk->peer_qpn = 0; smc_llc_link_clear(lnk, log); smcr_buf_unmap_lgr(lnk); @@ -1218,14 +1257,18 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_wr_free_link(lnk); smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); - smc_wr_free_link_mem(lnk); - smc_ibdev_cnt_dec(lnk); - put_device(&lnk->smcibdev->ibdev->dev); - smcibdev = lnk->smcibdev; - memset(lnk, 0, sizeof(struct smc_link)); - lnk->state = SMC_LNK_UNUSED; - if (!atomic_dec_return(&smcibdev->lnk_cnt)) - wake_up(&smcibdev->lnks_deleted); + smcr_link_put(lnk); /* theoretically last link_put */ +} + +void smcr_link_hold(struct smc_link *lnk) +{ + refcount_inc(&lnk->refcnt); +} + +void smcr_link_put(struct smc_link *lnk) +{ + if (refcount_dec_and_test(&lnk->refcnt)) + __smcr_link_clear(lnk); } static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, @@ -1290,6 +1333,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) __smc_lgr_free_bufs(lgr, true); } +/* won't be freed until no one accesses to lgr anymore */ +static void __smc_lgr_free(struct smc_link_group *lgr) +{ + smc_lgr_free_bufs(lgr); + if (lgr->is_smcd) { + if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) + wake_up(&lgr->smcd->lgrs_deleted); + } else { + smc_wr_free_lgr_mem(lgr); + if (!atomic_dec_return(&lgr_cnt)) + wake_up(&lgrs_deleted); + } + kfree(lgr); +} + /* remove a link group */ static void smc_lgr_free(struct smc_link_group *lgr) { @@ -1305,19 +1363,23 @@ static void smc_lgr_free(struct smc_link_group *lgr) smc_llc_lgr_clear(lgr); } - smc_lgr_free_bufs(lgr); destroy_workqueue(lgr->tx_wq); if (lgr->is_smcd) { smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); - if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) - wake_up(&lgr->smcd->lgrs_deleted); - } else { - smc_wr_free_lgr_mem(lgr); - if (!atomic_dec_return(&lgr_cnt)) - wake_up(&lgrs_deleted); } - kfree(lgr); + smc_lgr_put(lgr); /* theoretically last lgr_put */ +} + +void smc_lgr_hold(struct smc_link_group *lgr) +{ + refcount_inc(&lgr->refcnt); +} + +void smc_lgr_put(struct smc_link_group *lgr) +{ + if (refcount_dec_and_test(&lgr->refcnt)) + __smc_lgr_free(lgr); } static void smc_sk_wake_ups(struct smc_sock *smc) @@ -1469,16 +1531,11 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) /* Called when an SMCR device is removed or the smc module is unloaded. * If smcibdev is given, all SMCR link groups using this device are terminated. * If smcibdev is NULL, all SMCR link groups are terminated. - * - * We must wait here for QPs been destroyed before we destroy the CQs, - * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus - * smc_sock cannot be released. */ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_free_list); - LIST_HEAD(lgr_linkdown_list); int i; spin_lock_bh(&smc_lgr_list.lock); @@ -1490,7 +1547,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].smcibdev == smcibdev) - list_move_tail(&lgr->list, &lgr_linkdown_list); + smcr_link_down_cond_sched(&lgr->lnk[i]); } } } @@ -1502,16 +1559,6 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) __smc_lgr_terminate(lgr, false); } - list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) { - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].smcibdev == smcibdev) { - mutex_lock(&lgr->llc_conf_mutex); - smcr_link_down_cond(&lgr->lnk[i]); - mutex_unlock(&lgr->llc_conf_mutex); - } - } - } - if (smcibdev) { if (atomic_read(&smcibdev->lnk_cnt)) wait_event(smcibdev->lnks_deleted, @@ -1856,6 +1903,10 @@ create: goto out; } } + smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */ + if (!conn->lgr->is_smcd) + smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */ + conn->freed = 0; conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; @@ -2240,14 +2291,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || + !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || + !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -2256,7 +2309,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) { int i; - if (!conn->lgr || conn->lgr->is_smcd) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&conn->lgr->lnk[i])) @@ -2270,7 +2323,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) { int i; - if (!conn->lgr || conn->lgr->is_smcd) + if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&conn->lgr->lnk[i])) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 521c64a3d8d3..4cb03e942364 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -137,6 +137,8 @@ struct smc_link { u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */ u8 link_idx; /* index in lgr link array */ u8 link_is_asym; /* is link asymmetric? */ + u8 clearing : 1; /* link is being cleared */ + refcount_t refcnt; /* link reference count */ struct smc_link_group *lgr; /* parent link group */ struct work_struct link_down_wrk; /* wrk to bring link down */ char ibname[IB_DEVICE_NAME_MAX]; /* ib device name */ @@ -249,6 +251,7 @@ struct smc_link_group { u8 terminating : 1;/* lgr is terminating */ u8 freeing : 1; /* lgr is being freed */ + refcount_t refcnt; /* lgr reference count */ bool is_smcd; /* SMC-R or SMC-D */ u8 smc_version; u8 negotiated_eid[SMC_MAX_EID_LEN]; @@ -409,6 +412,11 @@ static inline struct smc_connection *smc_lgr_find_conn( return res; } +static inline bool smc_conn_lgr_valid(struct smc_connection *conn) +{ + return conn->lgr && conn->alert_token_local; +} + /* * Returns true if the specified link is usable. * @@ -487,6 +495,8 @@ struct smc_clc_msg_accept_confirm; void smc_lgr_cleanup_early(struct smc_link_group *lgr); void smc_lgr_terminate_sched(struct smc_link_group *lgr); +void smc_lgr_hold(struct smc_link_group *lgr); +void smc_lgr_put(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, @@ -518,6 +528,8 @@ void smc_core_exit(void); int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini); void smcr_link_clear(struct smc_link *lnk, bool log); +void smcr_link_hold(struct smc_link *lnk); +void smcr_link_put(struct smc_link *lnk); void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk); int smcr_buf_map_lgr(struct smc_link *lnk); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 7c8dad28c18d..b8898c787d23 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -89,7 +89,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, r->diag_state = sk->sk_state; if (smc->use_fallback) r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP; - else if (smc->conn.lgr && smc->conn.lgr->is_smcd) + else if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd) r->diag_mode = SMC_DIAG_MODE_SMCD; else r->diag_mode = SMC_DIAG_MODE_SMCR; @@ -142,7 +142,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if (smc->conn.lgr && !smc->conn.lgr->is_smcd && + if (smc_conn_lgr_valid(&smc->conn) && !smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_link *link = smc->conn.lnk; @@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; } - if (smc->conn.lgr && smc->conn.lgr->is_smcd && + if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_connection *conn = &smc->conn; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index db9825c01e0a..291f1484a1b7 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -369,7 +369,8 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); new_pe->ndev = ndev; - netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL); + if (ndev) + netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL); rc = -EEXIST; new_netdev = true; write_lock(&pnettable->lock); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 47512ccce5ef..a54e90a1110f 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -125,10 +125,6 @@ int smc_wr_tx_v2_send(struct smc_link *link, int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); -void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, - smc_wr_tx_dismisser dismisser, - unsigned long data); void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4292278a9552..2aabec2b4bec 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -37,18 +37,37 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); -#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) - #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL /* + * Mode for mapping cpus to pools. + */ +enum { + SVC_POOL_AUTO = -1, /* choose one of the others */ + SVC_POOL_GLOBAL, /* no mapping, just a single global pool + * (legacy & UP mode) */ + SVC_POOL_PERCPU, /* one pool per cpu */ + SVC_POOL_PERNODE /* one pool per numa node */ +}; + +/* * Structure for mapping cpus to pools and vice versa. * Setup once during sunrpc initialisation. */ -struct svc_pool_map svc_pool_map = { + +struct svc_pool_map { + int count; /* How many svc_servs use us */ + int mode; /* Note: int not enum to avoid + * warnings about "enumeration value + * not handled in switch" */ + unsigned int npools; + unsigned int *pool_to; /* maps pool id to cpu or node */ + unsigned int *to_pool; /* maps cpu or node to pool id */ +}; + +static struct svc_pool_map svc_pool_map = { .mode = SVC_POOL_DEFAULT }; -EXPORT_SYMBOL_GPL(svc_pool_map); static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ @@ -219,10 +238,12 @@ svc_pool_map_init_pernode(struct svc_pool_map *m) /* * Add a reference to the global map of cpus to pools (and - * vice versa). Initialise the map if we're the first user. - * Returns the number of pools. + * vice versa) if pools are in use. + * Initialise the map if we're the first user. + * Returns the number of pools. If this is '1', no reference + * was taken. */ -unsigned int +static unsigned int svc_pool_map_get(void) { struct svc_pool_map *m = &svc_pool_map; @@ -232,6 +253,7 @@ svc_pool_map_get(void) if (m->count++) { mutex_unlock(&svc_pool_map_mutex); + WARN_ON_ONCE(m->npools <= 1); return m->npools; } @@ -247,30 +269,36 @@ svc_pool_map_get(void) break; } - if (npools < 0) { + if (npools <= 0) { /* default, or memory allocation failure */ npools = 1; m->mode = SVC_POOL_GLOBAL; } m->npools = npools; + if (npools == 1) + /* service is unpooled, so doesn't hold a reference */ + m->count--; + mutex_unlock(&svc_pool_map_mutex); - return m->npools; + return npools; } -EXPORT_SYMBOL_GPL(svc_pool_map_get); /* - * Drop a reference to the global map of cpus to pools. + * Drop a reference to the global map of cpus to pools, if + * pools were in use, i.e. if npools > 1. * When the last reference is dropped, the map data is * freed; this allows the sysadmin to change the pool * mode using the pool_mode module option without * rebooting or re-loading sunrpc.ko. */ -void -svc_pool_map_put(void) +static void +svc_pool_map_put(int npools) { struct svc_pool_map *m = &svc_pool_map; + if (npools <= 1) + return; mutex_lock(&svc_pool_map_mutex); if (!--m->count) { @@ -283,7 +311,6 @@ svc_pool_map_put(void) mutex_unlock(&svc_pool_map_mutex); } -EXPORT_SYMBOL_GPL(svc_pool_map_put); static int svc_pool_map_get_node(unsigned int pidx) { @@ -340,21 +367,18 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) struct svc_pool_map *m = &svc_pool_map; unsigned int pidx = 0; - /* - * An uninitialised map happens in a pure client when - * lockd is brought up, so silently treat it the - * same as SVC_POOL_GLOBAL. - */ - if (svc_serv_is_pooled(serv)) { - switch (m->mode) { - case SVC_POOL_PERCPU: - pidx = m->to_pool[cpu]; - break; - case SVC_POOL_PERNODE: - pidx = m->to_pool[cpu_to_node(cpu)]; - break; - } + if (serv->sv_nrpools <= 1) + return serv->sv_pools; + + switch (m->mode) { + case SVC_POOL_PERCPU: + pidx = m->to_pool[cpu]; + break; + case SVC_POOL_PERNODE: + pidx = m->to_pool[cpu_to_node(cpu)]; + break; } + return &serv->sv_pools[pidx % serv->sv_nrpools]; } @@ -435,7 +459,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return NULL; serv->sv_name = prog->pg_name; serv->sv_program = prog; - serv->sv_nrthreads = 1; + kref_init(&serv->sv_refcnt); serv->sv_stats = prog->pg_stats; if (bufsize > RPCSVC_MAXPAYLOAD) bufsize = RPCSVC_MAXPAYLOAD; @@ -507,7 +531,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, goto out_err; return serv; out_err: - svc_pool_map_put(); + svc_pool_map_put(npools); return NULL; } EXPORT_SYMBOL_GPL(svc_create_pooled); @@ -523,23 +547,14 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net); /* * Destroy an RPC service. Should be called with appropriate locking to - * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. + * protect sv_permsocks and sv_tempsocks. */ void -svc_destroy(struct svc_serv *serv) +svc_destroy(struct kref *ref) { - dprintk("svc: svc_destroy(%s, %d)\n", - serv->sv_program->pg_name, - serv->sv_nrthreads); - - if (serv->sv_nrthreads) { - if (--(serv->sv_nrthreads) != 0) { - svc_sock_update_bufs(serv); - return; - } - } else - printk("svc_destroy: no threads for serv=%p!\n", serv); + struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt); + dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name); del_timer_sync(&serv->sv_temptimer); /* @@ -551,8 +566,7 @@ svc_destroy(struct svc_serv *serv) cache_clean_deferred(serv); - if (svc_serv_is_pooled(serv)) - svc_pool_map_put(); + svc_pool_map_put(serv->sv_nrpools); kfree(serv->sv_pools); kfree(serv); @@ -638,7 +652,7 @@ out_enomem: } EXPORT_SYMBOL_GPL(svc_rqst_alloc); -struct svc_rqst * +static struct svc_rqst * svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; @@ -647,14 +661,17 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return ERR_PTR(-ENOMEM); - serv->sv_nrthreads++; + svc_get(serv); + spin_lock_bh(&serv->sv_lock); + serv->sv_nrthreads += 1; + spin_unlock_bh(&serv->sv_lock); + spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); spin_unlock_bh(&pool->sp_lock); return rqstp; } -EXPORT_SYMBOL_GPL(svc_prepare_thread); /* * Choose a pool in which to create a new thread, for svc_set_num_threads @@ -748,59 +765,13 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } - -/* destroy old threads */ -static int -svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) -{ - struct task_struct *task; - unsigned int state = serv->sv_nrthreads-1; - - /* destroy old threads */ - do { - task = choose_victim(serv, pool, &state); - if (task == NULL) - break; - send_sig(SIGINT, task, 1); - nrservs++; - } while (nrservs < 0); - - return 0; -} - /* * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between * all pools. Caller must ensure that mutual exclusion between this and * server startup or shutdown. - * - * Destroying threads relies on the service threads filling in - * rqstp->rq_task, which only the nfs ones do. Assumes the serv - * has been created using svc_create_pooled(). - * - * Based on code that used to be in nfsd_svc() but tweaked - * to be pool-aware. */ -int -svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) -{ - if (pool == NULL) { - /* The -1 assumes caller has done a svc_get() */ - nrservs -= (serv->sv_nrthreads-1); - } else { - spin_lock_bh(&pool->sp_lock); - nrservs -= pool->sp_nrthreads; - spin_unlock_bh(&pool->sp_lock); - } - - if (nrservs > 0) - return svc_start_kthreads(serv, pool, nrservs); - if (nrservs < 0) - return svc_signal_kthreads(serv, pool, nrservs); - return 0; -} -EXPORT_SYMBOL_GPL(svc_set_num_threads); /* destroy old threads */ static int @@ -821,11 +792,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) } int -svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { if (pool == NULL) { - /* The -1 assumes caller has done a svc_get() */ - nrservs -= (serv->sv_nrthreads-1); + nrservs -= serv->sv_nrthreads; } else { spin_lock_bh(&pool->sp_lock); nrservs -= pool->sp_nrthreads; @@ -838,7 +808,7 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser return svc_stop_kthreads(serv, pool, nrservs); return 0; } -EXPORT_SYMBOL_GPL(svc_set_num_threads_sync); +EXPORT_SYMBOL_GPL(svc_set_num_threads); /** * svc_rqst_replace_page - Replace one page in rq_pages[] @@ -890,11 +860,14 @@ svc_exit_thread(struct svc_rqst *rqstp) list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); + spin_lock_bh(&serv->sv_lock); + serv->sv_nrthreads -= 1; + spin_unlock_bh(&serv->sv_lock); + svc_sock_update_bufs(serv); + svc_rqst_free(rqstp); - /* Release the server */ - if (serv) - svc_destroy(serv); + svc_put(serv); } EXPORT_SYMBOL_GPL(svc_exit_thread); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 1e99ba1b9d72..b21ad7994147 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -6,6 +6,7 @@ */ #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/errno.h> #include <linux/freezer.h> #include <linux/kthread.h> @@ -243,7 +244,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); if (IS_ERR(xprt)) trace_svc_xprt_create_err(serv->sv_program->pg_name, - xcl->xcl_name, sap, xprt); + xcl->xcl_name, sap, len, xprt); return xprt; } @@ -264,8 +265,6 @@ void svc_xprt_received(struct svc_xprt *xprt) return; } - trace_svc_xprt_received(xprt); - /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_enqueue_xprt with: */ @@ -466,7 +465,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) out_unlock: rcu_read_unlock(); put_cpu(); - trace_svc_xprt_do_enqueue(xprt, rqstp); + trace_svc_xprt_enqueue(xprt, rqstp); } EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); @@ -688,7 +687,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) return -EINTR; } trace_svc_alloc_arg_err(pages); - schedule_timeout(msecs_to_jiffies(500)); + memalloc_retry_wait(GFP_KERNEL); } rqstp->rq_page_end = &rqstp->rq_pages[pages]; rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */ @@ -842,8 +841,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } else svc_xprt_received(xprt); + out: - trace_svc_handle_xprt(xprt, len); return len; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 95e774f1b91f..efc84845bb6b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2059,6 +2059,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, splice_read_end: release_sock(sk); + sk_defer_free_flush(sk); return copied ? : err; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 12e2ddaf887f..d45d5366115a 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -192,8 +192,11 @@ void wait_for_unix_gc(void) { /* If number of inflight sockets is insane, * force a garbage collect right now. + * Paired with the WRITE_ONCE() in unix_inflight(), + * unix_notinflight() and gc_in_progress(). */ - if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && + !READ_ONCE(gc_in_progress)) unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } @@ -213,7 +216,9 @@ void unix_gc(void) if (gc_in_progress) goto out; - gc_in_progress = true; + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, true); + /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. @@ -299,7 +304,10 @@ void unix_gc(void) /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); - gc_in_progress = false; + + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, false); + wake_up(&unix_gc_wait); out: diff --git a/net/unix/scm.c b/net/unix/scm.c index 052ae709ce28..aa27a02478dc 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp) } else { BUG_ON(list_empty(&u->link)); } - unix_tot_inflight++; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } user->unix_inflight++; spin_unlock(&unix_gc_lock); @@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); - unix_tot_inflight--; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); } user->unix_inflight--; spin_unlock(&unix_gc_lock); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 4f7c99dfd16c..fb3302fff627 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -665,7 +665,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev) vsock_for_each_connected_socket(virtio_vsock_reset_sock); /* Stop all work handlers to make sure no one is accessing the device, - * so we can safely call vdev->config->reset(). + * so we can safely call virtio_reset_device(). */ mutex_lock(&vsock->rx_lock); vsock->rx_run = false; @@ -682,7 +682,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev) /* Flush all device writes and interrupts, device will not use any * more buffers. */ - vdev->config->reset(vdev); + virtio_reset_device(vdev); mutex_lock(&vsock->rx_lock); while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 756e7de7e33f..1e9be50469ce 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -33,8 +33,8 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ ) > $@ -$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ - $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) +$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDI) \ + $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509) @$(kecho) " GEN $@" $(Q)(set -e; \ allf=""; \ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index dccb8f3318ef..04d1ce9b510f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -31,6 +31,7 @@ #include <linux/if_tunnel.h> #include <net/dst.h> #include <net/flow.h> +#include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/gre.h> @@ -3295,7 +3296,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) fl4->flowi4_proto = iph->protocol; fl4->daddr = reverse ? iph->saddr : iph->daddr; fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; + fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; if (!ip_is_fragment(iph)) { switch (iph->protocol) { |