summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/ceph/Kconfig2
-rw-r--r--net/ceph/ceph_hash.c2
-rw-r--r--net/ceph/crush/hash.c2
-rw-r--r--net/ceph/crush/mapper.c2
-rw-r--r--net/ceph/debugfs.c3
-rw-r--r--net/ceph/osd_client.c43
-rw-r--r--net/core/bpf_sk_storage.c37
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c25
-rw-r--r--net/ipv4/bpfilter/sockopt.c14
-rw-r--r--net/ipv4/inet_connection_sock.c97
-rw-r--r--net/ipv4/inet_hashtables.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_fastopen.c23
-rw-r--r--net/mptcp/subflow.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c5
-rw-r--r--net/netfilter/nft_set_rbtree.c4
-rw-r--r--net/nfc/rawsock.c7
-rw-r--r--net/openvswitch/datapath.c10
-rw-r--r--net/openvswitch/flow_table.c35
-rw-r--r--net/openvswitch/flow_table.h3
-rw-r--r--net/packet/af_packet.c9
-rw-r--r--net/socket.c23
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c118
-rw-r--r--net/sunrpc/auth_gss/trace.c3
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/xprt.c9
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c31
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c115
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c81
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c124
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c74
-rw-r--r--net/tls/tls_device.c3
-rw-r--r--net/tls/tls_sw.c3
-rw-r--r--net/vmw_vsock/af_vsock.c2
-rw-r--r--net/xfrm/xfrm_policy.c10
42 files changed, 530 insertions, 451 deletions
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 12ecacf0c55f..c0762a302162 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -950,7 +950,7 @@ static int p9_bind_privport(struct socket *sock)
memset(&cl, 0, sizeof(cl));
cl.sin_family = AF_INET;
- cl.sin_addr.s_addr = INADDR_ANY;
+ cl.sin_addr.s_addr = htonl(INADDR_ANY);
for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
cl.sin_port = htons((ushort)port);
err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
index d7bec7adc267..f36f9a3a4e20 100644
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig
@@ -13,7 +13,7 @@ config CEPH_LIB
common functionality to both the Ceph filesystem and
to the rados block device (rbd).
- More information at http://ceph.newdream.net/.
+ More information at https://ceph.io/.
If unsure, say N.
diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c
index 9a5850f264ed..81e1e006c540 100644
--- a/net/ceph/ceph_hash.c
+++ b/net/ceph/ceph_hash.c
@@ -4,7 +4,7 @@
/*
* Robert Jenkin's hash function.
- * http://burtleburtle.net/bob/hash/evahash.html
+ * https://burtleburtle.net/bob/hash/evahash.html
* This is in the public domain.
*/
#define mix(a, b, c) \
diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c
index e5cc603cdb17..fe79f6d2d0db 100644
--- a/net/ceph/crush/hash.c
+++ b/net/ceph/crush/hash.c
@@ -7,7 +7,7 @@
/*
* Robert Jenkins' function for mixing 32-bit values
- * http://burtleburtle.net/bob/hash/evahash.html
+ * https://burtleburtle.net/bob/hash/evahash.html
* a, b = random bits, c = input and output
*/
#define crush_hashmix(a, b, c) do { \
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 3f323ed9df52..07e5614eb3f1 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -298,7 +298,7 @@ static __u64 crush_ln(unsigned int xin)
*
* for reference, see:
*
- * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
+ * https://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
*
*/
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 409d505ff320..2110439f8a24 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -223,6 +223,9 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req)
if (op->op == CEPH_OSD_OP_WATCH)
seq_printf(s, "-%s",
ceph_osd_watch_op_name(op->watch.op));
+ else if (op->op == CEPH_OSD_OP_CALL)
+ seq_printf(s, "-%s/%s", op->cls.class_name,
+ op->cls.method_name);
}
seq_putc(s, '\n');
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 2db8b44e70c2..e4fbcad6e7d8 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -525,7 +525,7 @@ EXPORT_SYMBOL(ceph_osdc_put_request);
static void request_init(struct ceph_osd_request *req)
{
- /* req only, each op is zeroed in _osd_req_op_init() */
+ /* req only, each op is zeroed in osd_req_op_init() */
memset(req, 0, sizeof(*req));
kref_init(&req->r_kref);
@@ -746,8 +746,8 @@ EXPORT_SYMBOL(ceph_osdc_alloc_messages);
* other information associated with them. It also serves as a
* common init routine for all the other init functions, below.
*/
-static struct ceph_osd_req_op *
-_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
+struct ceph_osd_req_op *
+osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
u16 opcode, u32 flags)
{
struct ceph_osd_req_op *op;
@@ -762,12 +762,6 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
return op;
}
-
-void osd_req_op_init(struct ceph_osd_request *osd_req,
- unsigned int which, u16 opcode, u32 flags)
-{
- (void)_osd_req_op_init(osd_req, which, opcode, flags);
-}
EXPORT_SYMBOL(osd_req_op_init);
void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
@@ -775,8 +769,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
u64 offset, u64 length,
u64 truncate_size, u32 truncate_seq)
{
- struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
- opcode, 0);
+ struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which,
+ opcode, 0);
size_t payload_len = 0;
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
@@ -822,7 +816,7 @@ void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
BUG_ON(which + 1 >= osd_req->r_num_ops);
prev_op = &osd_req->r_ops[which];
- op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags);
+ op = osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags);
/* dup previous one */
op->indata_len = prev_op->indata_len;
op->outdata_len = prev_op->outdata_len;
@@ -845,7 +839,7 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
size_t size;
int ret;
- op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
+ op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
pagelist = ceph_pagelist_alloc(GFP_NOFS);
if (!pagelist)
@@ -883,8 +877,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
u16 opcode, const char *name, const void *value,
size_t size, u8 cmp_op, u8 cmp_mode)
{
- struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
- opcode, 0);
+ struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which,
+ opcode, 0);
struct ceph_pagelist *pagelist;
size_t payload_len;
int ret;
@@ -928,7 +922,7 @@ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
{
struct ceph_osd_req_op *op;
- op = _osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
op->watch.cookie = cookie;
op->watch.op = watch_opcode;
op->watch.gen = 0;
@@ -943,10 +937,9 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
u64 expected_write_size,
u32 flags)
{
- struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
- CEPH_OSD_OP_SETALLOCHINT,
- 0);
+ struct ceph_osd_req_op *op;
+ op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_SETALLOCHINT, 0);
op->alloc_hint.expected_object_size = expected_object_size;
op->alloc_hint.expected_write_size = expected_write_size;
op->alloc_hint.flags = flags;
@@ -3076,9 +3069,7 @@ static void send_linger(struct ceph_osd_linger_request *lreq)
cancel_linger_request(req);
request_reinit(req);
- ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
- ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
- req->r_flags = lreq->t.flags;
+ target_copy(&req->r_t, &lreq->t);
req->r_mtime = lreq->mtime;
mutex_lock(&lreq->lock);
@@ -4801,7 +4792,7 @@ static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which,
struct ceph_pagelist *pl;
int ret;
- op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0);
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0);
pl = ceph_pagelist_alloc(GFP_NOIO);
if (!pl)
@@ -4870,7 +4861,7 @@ static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
struct ceph_pagelist *pl;
int ret;
- op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
op->notify.cookie = cookie;
pl = ceph_pagelist_alloc(GFP_NOIO);
@@ -5334,8 +5325,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
if (IS_ERR(pages))
return PTR_ERR(pages);
- op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2,
- dst_fadvise_flags);
+ op = osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2,
+ dst_fadvise_flags);
op->copy_from.snapid = src_snapid;
op->copy_from.src_version = src_version;
op->copy_from.flags = copy_from_flags;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d3377c90a291..b988f48153a4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -1384,18 +1384,39 @@ static int bpf_iter_init_sk_storage_map(void *priv_data,
return 0;
}
-static int bpf_iter_check_map(struct bpf_prog *prog,
- struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+ union bpf_iter_link_info *linfo,
+ struct bpf_iter_aux_info *aux)
{
- struct bpf_map *map = aux->map;
+ struct bpf_map *map;
+ int err = -EINVAL;
+
+ if (!linfo->map.map_fd)
+ return -EBADF;
+
+ map = bpf_map_get_with_uref(linfo->map.map_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
- return -EINVAL;
+ goto put_map;
- if (prog->aux->max_rdonly_access > map->value_size)
- return -EACCES;
+ if (prog->aux->max_rdonly_access > map->value_size) {
+ err = -EACCES;
+ goto put_map;
+ }
+ aux->map = map;
return 0;
+
+put_map:
+ bpf_map_put_with_uref(map);
+ return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+ bpf_map_put_with_uref(aux->map);
}
static const struct seq_operations bpf_sk_storage_map_seq_ops = {
@@ -1414,8 +1435,8 @@ static const struct bpf_iter_seq_info iter_seq_info = {
static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
.target = "bpf_sk_storage_map",
- .check_target = bpf_iter_check_map,
- .req_linfo = BPF_ITER_LINK_MAP_FD,
+ .attach_target = bpf_iter_attach_map,
+ .detach_target = bpf_iter_detach_map,
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 9de33b594ff2..efec66fa78b7 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -757,11 +757,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
return err;
}
- hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
- cpumask_and(mask, mask, housekeeping_cpumask(hk_flags));
- if (cpumask_empty(mask)) {
- free_cpumask_var(mask);
- return -EINVAL;
+ if (!cpumask_empty(mask)) {
+ hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
+ cpumask_and(mask, mask, housekeeping_cpumask(hk_flags));
+ if (cpumask_empty(mask)) {
+ free_cpumask_var(mask);
+ return -EINVAL;
+ }
}
map = kzalloc(max_t(unsigned int,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2828f6d5ba89..7e2e502ef519 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4853,7 +4853,7 @@ static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
if (err < 0)
goto out;
- if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
+ if (ip_is_fragment(ip_hdr(skb)))
fragment = true;
off = ip_hdrlen(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index a2044b4b606b..e4f40b175acb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3414,6 +3414,16 @@ static void sock_inuse_add(struct net *net, int val)
}
#endif
+static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
+{
+ if (!twsk_prot)
+ return;
+ kfree(twsk_prot->twsk_slab_name);
+ twsk_prot->twsk_slab_name = NULL;
+ kmem_cache_destroy(twsk_prot->twsk_slab);
+ twsk_prot->twsk_slab = NULL;
+}
+
static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
{
if (!rsk_prot)
@@ -3484,7 +3494,7 @@ int proto_register(struct proto *prot, int alloc_slab)
prot->slab_flags,
NULL);
if (prot->twsk_prot->twsk_slab == NULL)
- goto out_free_timewait_sock_slab_name;
+ goto out_free_timewait_sock_slab;
}
}
@@ -3492,15 +3502,15 @@ int proto_register(struct proto *prot, int alloc_slab)
ret = assign_proto_idx(prot);
if (ret) {
mutex_unlock(&proto_list_mutex);
- goto out_free_timewait_sock_slab_name;
+ goto out_free_timewait_sock_slab;
}
list_add(&prot->node, &proto_list);
mutex_unlock(&proto_list_mutex);
return ret;
-out_free_timewait_sock_slab_name:
+out_free_timewait_sock_slab:
if (alloc_slab && prot->twsk_prot)
- kfree(prot->twsk_prot->twsk_slab_name);
+ tw_prot_cleanup(prot->twsk_prot);
out_free_request_sock_slab:
if (alloc_slab) {
req_prot_cleanup(prot->rsk_prot);
@@ -3524,12 +3534,7 @@ void proto_unregister(struct proto *prot)
prot->slab = NULL;
req_prot_cleanup(prot->rsk_prot);
-
- if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
- kmem_cache_destroy(prot->twsk_prot->twsk_slab);
- kfree(prot->twsk_prot->twsk_slab_name);
- prot->twsk_prot->twsk_slab = NULL;
- }
+ tw_prot_cleanup(prot->twsk_prot);
}
EXPORT_SYMBOL(proto_unregister);
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 545b2640f019..1b34cb9a7708 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -57,18 +57,16 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
return bpfilter_mbox_request(sk, optname, optval, optlen, true);
}
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname,
- char __user *user_optval, int __user *optlen)
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
+ int __user *optlen)
{
- sockptr_t optval;
- int err, len;
+ int len;
if (get_user(len, optlen))
return -EFAULT;
- err = init_user_sockptr(&optval, user_optval, len);
- if (err)
- return err;
- return bpfilter_mbox_request(sk, optname, optval, len, false);
+
+ return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len,
+ false);
}
static int __init bpfilter_sockopt_init(void)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d1a3913eebe0..b457dd2d6c75 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -296,6 +296,57 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
ipv6_only_sock(sk), true, false);
}
+void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
+ struct sock *sk)
+{
+ kuid_t uid = sock_i_uid(sk);
+ bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
+
+ if (hlist_empty(&tb->owners)) {
+ tb->fastreuse = reuse;
+ if (sk->sk_reuseport) {
+ tb->fastreuseport = FASTREUSEPORT_ANY;
+ tb->fastuid = uid;
+ tb->fast_rcv_saddr = sk->sk_rcv_saddr;
+ tb->fast_ipv6_only = ipv6_only_sock(sk);
+ tb->fast_sk_family = sk->sk_family;
+#if IS_ENABLED(CONFIG_IPV6)
+ tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+#endif
+ } else {
+ tb->fastreuseport = 0;
+ }
+ } else {
+ if (!reuse)
+ tb->fastreuse = 0;
+ if (sk->sk_reuseport) {
+ /* We didn't match or we don't have fastreuseport set on
+ * the tb, but we have sk_reuseport set on this socket
+ * and we know that there are no bind conflicts with
+ * this socket in this tb, so reset our tb's reuseport
+ * settings so that any subsequent sockets that match
+ * our current socket will be put on the fast path.
+ *
+ * If we reset we need to set FASTREUSEPORT_STRICT so we
+ * do extra checking for all subsequent sk_reuseport
+ * socks.
+ */
+ if (!sk_reuseport_match(tb, sk)) {
+ tb->fastreuseport = FASTREUSEPORT_STRICT;
+ tb->fastuid = uid;
+ tb->fast_rcv_saddr = sk->sk_rcv_saddr;
+ tb->fast_ipv6_only = ipv6_only_sock(sk);
+ tb->fast_sk_family = sk->sk_family;
+#if IS_ENABLED(CONFIG_IPV6)
+ tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+#endif
+ }
+ } else {
+ tb->fastreuseport = 0;
+ }
+ }
+}
+
/* Obtain a reference to a local port for the given sock,
* if snum is zero it means select any available local port.
* We try to allocate an odd port (and leave even ports for connect())
@@ -308,7 +359,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
struct inet_bind_bucket *tb = NULL;
- kuid_t uid = sock_i_uid(sk);
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
@@ -345,49 +395,8 @@ tb_found:
goto fail_unlock;
}
success:
- if (hlist_empty(&tb->owners)) {
- tb->fastreuse = reuse;
- if (sk->sk_reuseport) {
- tb->fastreuseport = FASTREUSEPORT_ANY;
- tb->fastuid = uid;
- tb->fast_rcv_saddr = sk->sk_rcv_saddr;
- tb->fast_ipv6_only = ipv6_only_sock(sk);
- tb->fast_sk_family = sk->sk_family;
-#if IS_ENABLED(CONFIG_IPV6)
- tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
-#endif
- } else {
- tb->fastreuseport = 0;
- }
- } else {
- if (!reuse)
- tb->fastreuse = 0;
- if (sk->sk_reuseport) {
- /* We didn't match or we don't have fastreuseport set on
- * the tb, but we have sk_reuseport set on this socket
- * and we know that there are no bind conflicts with
- * this socket in this tb, so reset our tb's reuseport
- * settings so that any subsequent sockets that match
- * our current socket will be put on the fast path.
- *
- * If we reset we need to set FASTREUSEPORT_STRICT so we
- * do extra checking for all subsequent sk_reuseport
- * socks.
- */
- if (!sk_reuseport_match(tb, sk)) {
- tb->fastreuseport = FASTREUSEPORT_STRICT;
- tb->fastuid = uid;
- tb->fast_rcv_saddr = sk->sk_rcv_saddr;
- tb->fast_ipv6_only = ipv6_only_sock(sk);
- tb->fast_sk_family = sk->sk_family;
-#if IS_ENABLED(CONFIG_IPV6)
- tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
-#endif
- }
- } else {
- tb->fastreuseport = 0;
- }
- }
+ inet_csk_update_fastreuse(tb, sk);
+
if (!inet_csk(sk)->icsk_bind_hash)
inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 4eb4cd8d20dd..239e54474b65 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -163,6 +163,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
return -ENOMEM;
}
}
+ inet_csk_update_fastreuse(tb, child);
}
inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5653e3b011bf..54023a46db04 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -301,24 +301,16 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH *
2 * TCP_FASTOPEN_KEY_MAX) +
(TCP_FASTOPEN_KEY_MAX * 5)) };
- struct tcp_fastopen_context *ctx;
- u32 user_key[TCP_FASTOPEN_KEY_MAX * 4];
- __le32 key[TCP_FASTOPEN_KEY_MAX * 4];
+ u32 user_key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u32)];
+ __le32 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(__le32)];
char *backup_data;
- int ret, i = 0, off = 0, n_keys = 0;
+ int ret, i = 0, off = 0, n_keys;
tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
if (!tbl.data)
return -ENOMEM;
- rcu_read_lock();
- ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
- if (ctx) {
- n_keys = tcp_fastopen_context_len(ctx);
- memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys);
- }
- rcu_read_unlock();
-
+ n_keys = tcp_fastopen_get_cipher(net, NULL, (u64 *)key);
if (!n_keys) {
memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH);
n_keys = 1;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c06d2bfd2ec4..31f3b858db81 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3685,22 +3685,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return 0;
case TCP_FASTOPEN_KEY: {
- __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
- struct tcp_fastopen_context *ctx;
- unsigned int key_len = 0;
+ u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)];
+ unsigned int key_len;
if (get_user(len, optlen))
return -EFAULT;
- rcu_read_lock();
- ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
- if (ctx) {
- key_len = tcp_fastopen_context_len(ctx) *
- TCP_FASTOPEN_KEY_LENGTH;
- memcpy(&key[0], &ctx->key[0], key_len);
- }
- rcu_read_unlock();
-
+ key_len = tcp_fastopen_get_cipher(net, icsk, key) *
+ TCP_FASTOPEN_KEY_LENGTH;
len = min_t(unsigned int, len, key_len);
if (put_user(len, optlen))
return -EFAULT;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index c1a54f3d58f5..09b62de04eea 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -108,6 +108,29 @@ out:
return err;
}
+int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk,
+ u64 *key)
+{
+ struct tcp_fastopen_context *ctx;
+ int n_keys = 0, i;
+
+ rcu_read_lock();
+ if (icsk)
+ ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
+ else
+ ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+ if (ctx) {
+ n_keys = tcp_fastopen_context_len(ctx);
+ for (i = 0; i < n_keys; i++) {
+ put_unaligned_le64(ctx->key[i].key[0], key + (i * 2));
+ put_unaligned_le64(ctx->key[i].key[1], key + (i * 2) + 1);
+ }
+ }
+ rcu_read_unlock();
+
+ return n_keys;
+}
+
static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
struct sk_buff *syn,
const siphash_key_t *key,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 96f4f2fe50ad..e8cac2655c82 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -423,12 +423,12 @@ static void mptcp_sock_destruct(struct sock *sk)
* also remove the mptcp socket, via
* sock_put(ctx->conn).
*
- * Problem is that the mptcp socket will not be in
- * SYN_RECV state and doesn't have SOCK_DEAD flag.
+ * Problem is that the mptcp socket will be in
+ * ESTABLISHED state and will not have the SOCK_DEAD flag.
* Both result in warnings from inet_sock_destruct.
*/
- if (sk->sk_state == TCP_SYN_RECV) {
+ if (sk->sk_state == TCP_ESTABLISHED) {
sk->sk_state = TCP_CLOSE;
WARN_ON_ONCE(sk->sk_socket);
sock_orphan(sk);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e38b60fc183e..5b97d233f89b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -180,7 +180,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max);
-seqcount_t nf_conntrack_generation __read_mostly;
+seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static unsigned int nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
@@ -2588,7 +2588,8 @@ int nf_conntrack_init_start(void)
/* struct nf_ct_ext uses u8 to store offsets/size */
BUILD_BUG_ON(total_extension_size() > 255u);
- seqcount_init(&nf_conntrack_generation);
+ seqcount_spinlock_init(&nf_conntrack_generation,
+ &nf_conntrack_locks_all_lock);
for (i = 0; i < CONNTRACK_LOCKS; i++)
spin_lock_init(&nf_conntrack_locks[i]);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index b6aad3fc46c3..4b2834fd17b2 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -18,7 +18,7 @@
struct nft_rbtree {
struct rb_root root;
rwlock_t lock;
- seqcount_t count;
+ seqcount_rwlock_t count;
struct delayed_work gc_work;
};
@@ -523,7 +523,7 @@ static int nft_rbtree_init(const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
rwlock_init(&priv->lock);
- seqcount_init(&priv->count);
+ seqcount_rwlock_init(&priv->count, &priv->lock);
priv->root = RB_ROOT;
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index b2061b6746ea..955c195ae14b 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -328,10 +328,13 @@ static int rawsock_create(struct net *net, struct socket *sock,
if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW))
return -ESOCKTNOSUPPORT;
- if (sock->type == SOCK_RAW)
+ if (sock->type == SOCK_RAW) {
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
sock->ops = &rawsock_raw_ops;
- else
+ } else {
sock->ops = &rawsock_ops;
+ }
sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern);
if (!sk)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 42f8cc70bb2c..6e47ef7ef036 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1756,6 +1756,7 @@ err:
/* Called with ovs_mutex. */
static void __dp_destroy(struct datapath *dp)
{
+ struct flow_table *table = &dp->table;
int i;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
@@ -1774,7 +1775,14 @@ static void __dp_destroy(struct datapath *dp)
*/
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
- /* RCU destroy the flow table */
+ /* Flush sw_flow in the tables. RCU cb only releases resource
+ * such as dp, ports and tables. That may avoid some issues
+ * such as RCU usage warning.
+ */
+ table_instance_flow_flush(table, ovsl_dereference(table->ti),
+ ovsl_dereference(table->ufid_ti));
+
+ /* RCU destroy the ports, meters and flow tables. */
call_rcu(&dp->rcu, destroy_dp_rcu);
}
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 8c12675cbb67..e2235849a57e 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -473,19 +473,15 @@ static void table_instance_flow_free(struct flow_table *table,
flow_mask_remove(table, flow->mask);
}
-static void table_instance_destroy(struct flow_table *table,
- struct table_instance *ti,
- struct table_instance *ufid_ti,
- bool deferred)
+/* Must be called with OVS mutex held. */
+void table_instance_flow_flush(struct flow_table *table,
+ struct table_instance *ti,
+ struct table_instance *ufid_ti)
{
int i;
- if (!ti)
- return;
-
- BUG_ON(!ufid_ti);
if (ti->keep_flows)
- goto skip_flows;
+ return;
for (i = 0; i < ti->n_buckets; i++) {
struct sw_flow *flow;
@@ -497,18 +493,16 @@ static void table_instance_destroy(struct flow_table *table,
table_instance_flow_free(table, ti, ufid_ti,
flow, false);
- ovs_flow_free(flow, deferred);
+ ovs_flow_free(flow, true);
}
}
+}
-skip_flows:
- if (deferred) {
- call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
- call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
- } else {
- __table_instance_destroy(ti);
- __table_instance_destroy(ufid_ti);
- }
+static void table_instance_destroy(struct table_instance *ti,
+ struct table_instance *ufid_ti)
+{
+ call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+ call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
}
/* No need for locking this function is called from RCU callback or
@@ -523,7 +517,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
call_rcu(&mc->rcu, mask_cache_rcu_cb);
call_rcu(&ma->rcu, mask_array_rcu_cb);
- table_instance_destroy(table, ti, ufid_ti, false);
+ table_instance_destroy(ti, ufid_ti);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -641,7 +635,8 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
flow_table->count = 0;
flow_table->ufid_count = 0;
- table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
+ table_instance_flow_flush(flow_table, old_ti, old_ufid_ti);
+ table_instance_destroy(old_ti, old_ufid_ti);
return 0;
err_free_ti:
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 74ce48fecba9..6e7d4ac59353 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -105,5 +105,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
bool full, const struct sw_flow_mask *mask);
void ovs_flow_masks_rebalance(struct flow_table *table);
+void table_instance_flow_flush(struct flow_table *table,
+ struct table_instance *ti,
+ struct table_instance *ufid_ti);
#endif /* flow_table.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0b8160d1a6e0..479c257ded73 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -941,6 +941,7 @@ static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
}
static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
+ __releases(&pkc->blk_fill_in_prog_lock)
{
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
@@ -989,6 +990,7 @@ static void prb_fill_curr_block(char *curr,
struct tpacket_kbdq_core *pkc,
struct tpacket_block_desc *pbd,
unsigned int len)
+ __acquires(&pkc->blk_fill_in_prog_lock)
{
struct tpacket3_hdr *ppd;
@@ -2286,8 +2288,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (do_vnet &&
virtio_net_hdr_from_skb(skb, h.raw + macoff -
sizeof(struct virtio_net_hdr),
- vio_le(), true, 0))
+ vio_le(), true, 0)) {
+ if (po->tp_version == TPACKET_V3)
+ prb_clear_blk_fill_status(&po->rx_ring);
goto drop_n_account;
+ }
if (po->tp_version <= TPACKET_V2) {
packet_increment_rx_head(po, &po->rx_ring);
@@ -2393,7 +2398,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
__clear_bit(slot_id, po->rx_ring.rx_owner_map);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
- } else {
+ } else if (po->tp_version == TPACKET_V3) {
prb_clear_blk_fill_status(&po->rx_ring);
}
diff --git a/net/socket.c b/net/socket.c
index aff52e81653c..dbbe8ea7d395 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -500,7 +500,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
if (f.file) {
sock = sock_from_file(f.file, err);
if (likely(sock)) {
- *fput_needed = f.flags;
+ *fput_needed = f.flags & FDPUT_FPUT;
return sock;
}
fdput(f);
@@ -1325,7 +1325,7 @@ int sock_wake_async(struct socket_wq *wq, int how, int band)
case SOCK_WAKE_SPACE:
if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
break;
- /* fall through */
+ fallthrough;
case SOCK_WAKE_IO:
call_kill:
kill_fasync(&wq->fasync_list, SIGIO, band);
@@ -1804,8 +1804,7 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
upeer_addrlen, flags,
rlimit(RLIMIT_NOFILE));
- if (f.flags)
- fput(f.file);
+ fdput(f);
}
return ret;
@@ -1868,8 +1867,7 @@ int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
ret = move_addr_to_kernel(uservaddr, addrlen, &address);
if (!ret)
ret = __sys_connect_file(f.file, &address, addrlen, 0);
- if (f.flags)
- fput(f.file);
+ fdput(f);
}
return ret;
@@ -2097,7 +2095,7 @@ static bool sock_use_custom_sol_socket(const struct socket *sock)
int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
int optlen)
{
- sockptr_t optval;
+ sockptr_t optval = USER_SOCKPTR(user_optval);
char *kernel_optval = NULL;
int err, fput_needed;
struct socket *sock;
@@ -2105,10 +2103,6 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
if (optlen < 0)
return -EINVAL;
- err = init_user_sockptr(&optval, user_optval, optlen);
- if (err)
- return err;
-
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
return err;
@@ -3065,7 +3059,7 @@ static int __init sock_init(void)
err = register_filesystem(&sock_fs_type);
if (err)
- goto out_fs;
+ goto out;
sock_mnt = kern_mount(&sock_fs_type);
if (IS_ERR(sock_mnt)) {
err = PTR_ERR(sock_mnt);
@@ -3088,7 +3082,6 @@ out:
out_mount:
unregister_filesystem(&sock_fs_type);
-out_fs:
goto out;
}
@@ -3161,13 +3154,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
return -ENOMEM;
buf_size += rule_cnt * sizeof(u32);
- /* fall through */
+ fallthrough;
case ETHTOOL_GRXRINGS:
case ETHTOOL_GRXCLSRLCNT:
case ETHTOOL_GRXCLSRULE:
case ETHTOOL_SRXCLSRLINS:
convert_out = true;
- /* fall through */
+ fallthrough;
case ETHTOOL_SRXCLSRLDEL:
buf_size += sizeof(struct ethtool_rxnfc);
convert_in = true;
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index cf0fd170ac18..90b8329fef82 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -584,7 +584,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
buf->head[0].iov_len);
memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
- buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip;
+ buf->len = len - (GSS_KRB5_TOK_HDR_LEN + headskip);
/* Trim off the trailing "extra count" and checksum blob */
xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 46027d0c903f..258b04372f85 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -332,7 +332,7 @@ static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct r
struct gss_svc_seq_data {
/* highest seq number seen so far: */
- int sd_max;
+ u32 sd_max;
/* for i such that sd_max-GSS_SEQ_WIN < i <= sd_max, the i-th bit of
* sd_win is nonzero iff sequence number i has been seen already: */
unsigned long sd_win[GSS_SEQ_WIN/BITS_PER_LONG];
@@ -613,16 +613,29 @@ gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle)
return found;
}
-/* Implements sequence number algorithm as specified in RFC 2203. */
-static int
-gss_check_seq_num(struct rsc *rsci, int seq_num)
+/**
+ * gss_check_seq_num - GSS sequence number window check
+ * @rqstp: RPC Call to use when reporting errors
+ * @rsci: cached GSS context state (updated on return)
+ * @seq_num: sequence number to check
+ *
+ * Implements sequence number algorithm as specified in
+ * RFC 2203, Section 5.3.3.1. "Context Management".
+ *
+ * Return values:
+ * %true: @rqstp's GSS sequence number is inside the window
+ * %false: @rqstp's GSS sequence number is outside the window
+ */
+static bool gss_check_seq_num(const struct svc_rqst *rqstp, struct rsc *rsci,
+ u32 seq_num)
{
struct gss_svc_seq_data *sd = &rsci->seqdata;
+ bool result = false;
spin_lock(&sd->sd_lock);
if (seq_num > sd->sd_max) {
if (seq_num >= sd->sd_max + GSS_SEQ_WIN) {
- memset(sd->sd_win,0,sizeof(sd->sd_win));
+ memset(sd->sd_win, 0, sizeof(sd->sd_win));
sd->sd_max = seq_num;
} else while (sd->sd_max < seq_num) {
sd->sd_max++;
@@ -631,17 +644,25 @@ gss_check_seq_num(struct rsc *rsci, int seq_num)
__set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win);
goto ok;
} else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) {
- goto drop;
+ goto toolow;
}
- /* sd_max - GSS_SEQ_WIN < seq_num <= sd_max */
if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win))
- goto drop;
+ goto alreadyseen;
+
ok:
+ result = true;
+out:
spin_unlock(&sd->sd_lock);
- return 1;
-drop:
- spin_unlock(&sd->sd_lock);
- return 0;
+ return result;
+
+toolow:
+ trace_rpcgss_svc_seqno_low(rqstp, seq_num,
+ sd->sd_max - GSS_SEQ_WIN,
+ sd->sd_max);
+ goto out;
+alreadyseen:
+ trace_rpcgss_svc_seqno_seen(rqstp, seq_num);
+ goto out;
}
static inline u32 round_up_to_quad(u32 i)
@@ -721,14 +742,12 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
}
if (gc->gc_seq > MAXSEQ) {
- trace_rpcgss_svc_large_seqno(rqstp->rq_xid, gc->gc_seq);
+ trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq);
*authp = rpcsec_gsserr_ctxproblem;
return SVC_DENIED;
}
- if (!gss_check_seq_num(rsci, gc->gc_seq)) {
- trace_rpcgss_svc_old_seqno(rqstp->rq_xid, gc->gc_seq);
+ if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq))
return SVC_DROP;
- }
return SVC_OK;
}
@@ -866,11 +885,13 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
static int
unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
{
+ u32 integ_len, rseqno, maj_stat;
int stat = -EINVAL;
- u32 integ_len, maj_stat;
struct xdr_netobj mic;
struct xdr_buf integ_buf;
+ mic.data = NULL;
+
/* NFS READ normally uses splice to send data in-place. However
* the data in cache can change after the reply's MIC is computed
* but before the RPC reply is sent. To prevent the client from
@@ -885,34 +906,44 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
integ_len = svc_getnl(&buf->head[0]);
if (integ_len & 3)
- return stat;
+ goto unwrap_failed;
if (integ_len > buf->len)
- return stat;
- if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
- WARN_ON_ONCE(1);
- return stat;
- }
+ goto unwrap_failed;
+ if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
+ goto unwrap_failed;
+
/* copy out mic... */
if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
- return stat;
+ goto unwrap_failed;
if (mic.len > RPC_MAX_AUTH_SIZE)
- return stat;
+ goto unwrap_failed;
mic.data = kmalloc(mic.len, GFP_KERNEL);
if (!mic.data)
- return stat;
+ goto unwrap_failed;
if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len))
- goto out;
+ goto unwrap_failed;
maj_stat = gss_verify_mic(ctx, &integ_buf, &mic);
if (maj_stat != GSS_S_COMPLETE)
- goto out;
- if (svc_getnl(&buf->head[0]) != seq)
- goto out;
+ goto bad_mic;
+ rseqno = svc_getnl(&buf->head[0]);
+ if (rseqno != seq)
+ goto bad_seqno;
/* trim off the mic and padding at the end before returning */
xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
stat = 0;
out:
kfree(mic.data);
return stat;
+
+unwrap_failed:
+ trace_rpcgss_svc_unwrap_failed(rqstp);
+ goto out;
+bad_seqno:
+ trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno);
+ goto out;
+bad_mic:
+ trace_rpcgss_svc_mic(rqstp, maj_stat);
+ goto out;
}
static inline int
@@ -937,6 +968,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
{
u32 priv_len, maj_stat;
int pad, remaining_len, offset;
+ u32 rseqno;
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
@@ -951,14 +983,13 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
* not yet read from the head, so these two values are different: */
remaining_len = total_buf_len(buf);
if (priv_len > remaining_len)
- return -EINVAL;
+ goto unwrap_failed;
pad = remaining_len - priv_len;
buf->len -= pad;
fix_priv_head(buf, pad);
maj_stat = gss_unwrap(ctx, 0, priv_len, buf);
pad = priv_len - buf->len;
- buf->len -= pad;
/* The upper layers assume the buffer is aligned on 4-byte boundaries.
* In the krb5p case, at least, the data ends up offset, so we need to
* move it around. */
@@ -972,11 +1003,22 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
fix_priv_head(buf, pad);
}
if (maj_stat != GSS_S_COMPLETE)
- return -EINVAL;
+ goto bad_unwrap;
out_seq:
- if (svc_getnl(&buf->head[0]) != seq)
- return -EINVAL;
+ rseqno = svc_getnl(&buf->head[0]);
+ if (rseqno != seq)
+ goto bad_seqno;
return 0;
+
+unwrap_failed:
+ trace_rpcgss_svc_unwrap_failed(rqstp);
+ return -EINVAL;
+bad_seqno:
+ trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno);
+ return -EINVAL;
+bad_unwrap:
+ trace_rpcgss_svc_unwrap(rqstp, maj_stat);
+ return -EINVAL;
}
struct gss_svc_data {
@@ -1314,8 +1356,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
if (status)
goto out;
- trace_rpcgss_svc_accept_upcall(rqstp->rq_xid, ud.major_status,
- ud.minor_status);
+ trace_rpcgss_svc_accept_upcall(rqstp, ud.major_status, ud.minor_status);
switch (ud.major_status) {
case GSS_S_CONTINUE_NEEDED:
@@ -1490,8 +1531,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
- trace_rpcgss_svc_accept(rqstp->rq_xid, argv->iov_len);
-
*authp = rpc_autherr_badcred;
if (!svcdata)
svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
@@ -1608,6 +1647,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
GSS_C_QOP_DEFAULT,
gc->gc_svc);
ret = SVC_OK;
+ trace_rpcgss_svc_authenticate(rqstp, gc);
goto out;
}
garbage_args:
diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c
index 49fa583d7f91..d26036a57443 100644
--- a/net/sunrpc/auth_gss/trace.c
+++ b/net/sunrpc/auth_gss/trace.c
@@ -5,6 +5,9 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
+#include <linux/sunrpc/auth_gss.h>
#include <linux/sunrpc/gss_err.h>
#include <linux/sunrpc/auth_gss.h>
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index e9d0953522f0..eadc0ede928c 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1510,6 +1510,6 @@ err_notifier:
void unregister_rpc_pipefs(void)
{
rpc_clients_notifier_unregister();
- kmem_cache_destroy(rpc_inode_cachep);
unregister_filesystem(&rpc_pipe_fs_type);
+ kmem_cache_destroy(rpc_inode_cachep);
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d5cc5db9dbf3..6ba9d5842629 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -607,6 +607,11 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req)
req->rq_majortimeo += xprt_calc_majortimeo(req);
}
+static void xprt_reset_minortimeo(struct rpc_rqst *req)
+{
+ req->rq_minortimeo += req->rq_timeout;
+}
+
static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
{
unsigned long time_init;
@@ -618,6 +623,7 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
req->rq_timeout = task->tk_client->cl_timeout->to_initval;
req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
+ req->rq_minortimeo = time_init + req->rq_timeout;
}
/**
@@ -631,6 +637,8 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
int status = 0;
+ if (time_before(jiffies, req->rq_minortimeo))
+ return status;
if (time_before(jiffies, req->rq_majortimeo)) {
if (to->to_exponential)
req->rq_timeout <<= 1;
@@ -649,6 +657,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
spin_unlock(&xprt->transport_lock);
status = -ETIMEDOUT;
}
+ xprt_reset_minortimeo(req);
if (req->rq_timeout == 0) {
printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n");
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index b647562a26dd..7f94c9a19fd3 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -40,7 +40,6 @@
* New MRs are created on demand.
*/
-#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 453bacc99907..0f5120c7668f 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -275,14 +275,6 @@ out:
return n;
}
-static void
-xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
-{
- *iptr++ = cpu_to_be32(mr->mr_handle);
- *iptr++ = cpu_to_be32(mr->mr_length);
- xdr_encode_hyper(iptr, mr->mr_offset);
-}
-
static int
encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
{
@@ -292,7 +284,7 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
if (unlikely(!p))
return -EMSGSIZE;
- xdr_encode_rdma_segment(p, mr);
+ xdr_encode_rdma_segment(p, mr->mr_handle, mr->mr_length, mr->mr_offset);
return 0;
}
@@ -307,8 +299,8 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
return -EMSGSIZE;
*p++ = xdr_one; /* Item present */
- *p++ = cpu_to_be32(position);
- xdr_encode_rdma_segment(p, mr);
+ xdr_encode_read_segment(p, position, mr->mr_handle, mr->mr_length,
+ mr->mr_offset);
return 0;
}
@@ -1133,11 +1125,11 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
p = xdr_inline_decode(xdr, 0);
/* Chunk lists */
- if (*p++ != xdr_zero)
+ if (xdr_item_is_present(p++))
return false;
- if (*p++ != xdr_zero)
+ if (xdr_item_is_present(p++))
return false;
- if (*p++ != xdr_zero)
+ if (xdr_item_is_present(p++))
return false;
/* RPC header */
@@ -1176,10 +1168,7 @@ static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
if (unlikely(!p))
return -EIO;
- handle = be32_to_cpup(p++);
- *length = be32_to_cpup(p++);
- xdr_decode_hyper(p, &offset);
-
+ xdr_decode_rdma_segment(p, &handle, length, &offset);
trace_xprtrdma_decode_seg(handle, *length, offset);
return 0;
}
@@ -1215,7 +1204,7 @@ static int decode_read_list(struct xdr_stream *xdr)
p = xdr_inline_decode(xdr, sizeof(*p));
if (unlikely(!p))
return -EIO;
- if (unlikely(*p != xdr_zero))
+ if (unlikely(xdr_item_is_present(p)))
return -EIO;
return 0;
}
@@ -1234,7 +1223,7 @@ static int decode_write_list(struct xdr_stream *xdr, u32 *length)
p = xdr_inline_decode(xdr, sizeof(*p));
if (unlikely(!p))
return -EIO;
- if (*p == xdr_zero)
+ if (xdr_item_is_absent(p))
break;
if (!first)
return -EIO;
@@ -1256,7 +1245,7 @@ static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
return -EIO;
*length = 0;
- if (*p != xdr_zero)
+ if (xdr_item_is_present(p))
if (decode_write_chunk(xdr, length))
return -EIO;
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 1ee73f7cf931..5e7c4ba9e147 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -87,7 +87,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
ctxt->sc_send_wr.opcode = IB_WR_SEND;
- return svc_rdma_send(rdma, &ctxt->sc_send_wr);
+ return svc_rdma_send(rdma, ctxt);
}
/* Server-side transport endpoint wants a whole page for its send
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index e426fedb9524..c6ea2903c21a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -117,6 +117,13 @@ svc_rdma_next_recv_ctxt(struct list_head *list)
rc_list);
}
+static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
+ struct rpc_rdma_cid *cid)
+{
+ cid->ci_queue_id = rdma->sc_rq_cq->res.id;
+ cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
+}
+
static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
@@ -135,6 +142,8 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
goto fail2;
+ svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid);
+
ctxt->rc_recv_wr.next = NULL;
ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe;
ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge;
@@ -248,16 +257,15 @@ static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
{
int ret;
- svc_xprt_get(&rdma->sc_xprt);
+ trace_svcrdma_post_recv(ctxt);
ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
- trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
if (ret)
goto err_post;
return 0;
err_post:
+ trace_svcrdma_rq_post_err(rdma, ret);
svc_rdma_recv_ctxt_put(rdma, ctxt);
- svc_xprt_put(&rdma->sc_xprt);
return ret;
}
@@ -265,6 +273,8 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
+ if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+ return 0;
ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
return -ENOMEM;
@@ -309,11 +319,10 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_recv_ctxt *ctxt;
- trace_svcrdma_wc_receive(wc);
-
/* WARNING: Only wc->wr_cqe and wc->status are reliable */
ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
+ trace_svcrdma_wc_receive(wc, &ctxt->rc_cid);
if (wc->status != IB_WC_SUCCESS)
goto flushed;
@@ -333,15 +342,13 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
spin_unlock(&rdma->sc_rq_dto_lock);
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
svc_xprt_enqueue(&rdma->sc_xprt);
- goto out;
+ return;
flushed:
post_err:
svc_rdma_recv_ctxt_put(rdma, ctxt);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
-out:
- svc_xprt_put(&rdma->sc_xprt);
}
/**
@@ -419,7 +426,7 @@ static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
len = 0;
first = true;
- while (*p != xdr_zero) {
+ while (xdr_item_is_present(p)) {
p = xdr_inline_decode(&rctxt->rc_stream,
rpcrdma_readseg_maxsz * sizeof(*p));
if (!p)
@@ -466,9 +473,7 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)
if (!p)
return false;
- handle = be32_to_cpup(p++);
- length = be32_to_cpup(p++);
- xdr_decode_hyper(p, &offset);
+ xdr_decode_rdma_segment(p, &handle, &length, &offset);
trace_svcrdma_decode_wseg(handle, length, offset);
total += length;
@@ -500,7 +505,7 @@ static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)
if (!p)
return false;
rctxt->rc_write_list = p;
- while (*p != xdr_zero) {
+ while (xdr_item_is_present(p)) {
if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK))
return false;
++chcount;
@@ -532,12 +537,11 @@ static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
- rctxt->rc_reply_chunk = p;
- if (*p != xdr_zero) {
+ rctxt->rc_reply_chunk = NULL;
+ if (xdr_item_is_present(p)) {
if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK))
return false;
- } else {
- rctxt->rc_reply_chunk = NULL;
+ rctxt->rc_reply_chunk = p;
}
return true;
}
@@ -568,7 +572,7 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
p += rpcrdma_fixed_maxsz;
/* Read list */
- while (*p++ != xdr_zero) {
+ while (xdr_item_is_present(p++)) {
p++; /* position */
if (inv_rkey == xdr_zero)
inv_rkey = *p;
@@ -578,7 +582,7 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
}
/* Write list */
- while (*p++ != xdr_zero) {
+ while (xdr_item_is_present(p++)) {
segcount = be32_to_cpup(p++);
for (i = 0; i < segcount; i++) {
if (inv_rkey == xdr_zero)
@@ -590,7 +594,7 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
}
/* Reply chunk */
- if (*p++ != xdr_zero) {
+ if (xdr_item_is_present(p++)) {
segcount = be32_to_cpup(p++);
for (i = 0; i < segcount; i++) {
if (inv_rkey == xdr_zero)
@@ -661,27 +665,27 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
hdr_len = xdr_stream_pos(&rctxt->rc_stream);
rq_arg->head[0].iov_len -= hdr_len;
rq_arg->len -= hdr_len;
- trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
+ trace_svcrdma_decode_rqst(rctxt, rdma_argp, hdr_len);
return hdr_len;
out_short:
- trace_svcrdma_decode_short_err(rq_arg->len);
+ trace_svcrdma_decode_short_err(rctxt, rq_arg->len);
return -EINVAL;
out_version:
- trace_svcrdma_decode_badvers_err(rdma_argp);
+ trace_svcrdma_decode_badvers_err(rctxt, rdma_argp);
return -EPROTONOSUPPORT;
out_drop:
- trace_svcrdma_decode_drop_err(rdma_argp);
+ trace_svcrdma_decode_drop_err(rctxt, rdma_argp);
return 0;
out_proc:
- trace_svcrdma_decode_badproc_err(rdma_argp);
+ trace_svcrdma_decode_badproc_err(rctxt, rdma_argp);
return -EINVAL;
out_inval:
- trace_svcrdma_decode_parse_err(rdma_argp);
+ trace_svcrdma_decode_parse_err(rctxt, rdma_argp);
return -EINVAL;
}
@@ -714,57 +718,16 @@ static void rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_arg.buflen = head->rc_arg.buflen;
}
-static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
- __be32 *rdma_argp, int status)
+static void svc_rdma_send_error(struct svcxprt_rdma *rdma,
+ struct svc_rdma_recv_ctxt *rctxt,
+ int status)
{
- struct svc_rdma_send_ctxt *ctxt;
- __be32 *p;
- int ret;
+ struct svc_rdma_send_ctxt *sctxt;
- ctxt = svc_rdma_send_ctxt_get(xprt);
- if (!ctxt)
+ sctxt = svc_rdma_send_ctxt_get(rdma);
+ if (!sctxt)
return;
-
- p = xdr_reserve_space(&ctxt->sc_stream,
- rpcrdma_fixed_maxsz * sizeof(*p));
- if (!p)
- goto put_ctxt;
-
- *p++ = *rdma_argp;
- *p++ = *(rdma_argp + 1);
- *p++ = xprt->sc_fc_credits;
- *p = rdma_error;
-
- switch (status) {
- case -EPROTONOSUPPORT:
- p = xdr_reserve_space(&ctxt->sc_stream, 3 * sizeof(*p));
- if (!p)
- goto put_ctxt;
-
- *p++ = err_vers;
- *p++ = rpcrdma_version;
- *p = rpcrdma_version;
- trace_svcrdma_err_vers(*rdma_argp);
- break;
- default:
- p = xdr_reserve_space(&ctxt->sc_stream, sizeof(*p));
- if (!p)
- goto put_ctxt;
-
- *p = err_chunk;
- trace_svcrdma_err_chunk(*rdma_argp);
- }
-
- ctxt->sc_send_wr.num_sge = 1;
- ctxt->sc_send_wr.opcode = IB_WR_SEND;
- ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len;
- ret = svc_rdma_send(xprt, &ctxt->sc_send_wr);
- if (ret)
- goto put_ctxt;
- return;
-
-put_ctxt:
- svc_rdma_send_ctxt_put(xprt, ctxt);
+ svc_rdma_send_error_msg(rdma, sctxt, rctxt, status);
}
/* By convention, backchannel calls arrive via rdma_msg type
@@ -900,13 +863,13 @@ out_readchunk:
return 0;
out_err:
- svc_rdma_send_error(rdma_xprt, p, ret);
+ svc_rdma_send_error(rdma_xprt, ctxt, ret);
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
out_postfail:
if (ret == -EINVAL)
- svc_rdma_send_error(rdma_xprt, p, ret);
+ svc_rdma_send_error(rdma_xprt, ctxt, ret);
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 5eb35309ecef..fe54cbe97a46 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -7,6 +7,7 @@
#include <rdma/rw.h>
+#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
@@ -144,17 +145,25 @@ static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
* demand, and not cached.
*/
struct svc_rdma_chunk_ctxt {
+ struct rpc_rdma_cid cc_cid;
struct ib_cqe cc_cqe;
struct svcxprt_rdma *cc_rdma;
struct list_head cc_rwctxts;
int cc_sqecount;
};
+static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma,
+ struct rpc_rdma_cid *cid)
+{
+ cid->ci_queue_id = rdma->sc_sq_cq->res.id;
+ cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
+}
+
static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc)
{
+ svc_rdma_cc_cid_init(rdma, &cc->cc_cid);
cc->cc_rdma = rdma;
- svc_xprt_get(&rdma->sc_xprt);
INIT_LIST_HEAD(&cc->cc_rwctxts);
cc->cc_sqecount = 0;
@@ -174,7 +183,6 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
ctxt->rw_nents, dir);
svc_rdma_put_rw_ctxt(rdma, ctxt);
}
- svc_xprt_put(&rdma->sc_xprt);
}
/* State for sending a Write or Reply chunk.
@@ -236,7 +244,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
- trace_svcrdma_wc_write(wc);
+ trace_svcrdma_wc_write(wc, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
@@ -294,7 +302,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_read_info *info =
container_of(cc, struct svc_rdma_read_info, ri_cc);
- trace_svcrdma_wc_read(wc);
+ trace_svcrdma_wc_read(wc, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
@@ -350,6 +358,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
do {
if (atomic_sub_return(cc->cc_sqecount,
&rdma->sc_sq_avail) > 0) {
+ trace_svcrdma_post_chunk(&cc->cc_cid, cc->cc_sqecount);
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
if (ret)
break;
@@ -441,34 +450,32 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz;
do {
unsigned int write_len;
- u32 seg_length, seg_handle;
- u64 seg_offset;
+ u32 handle, length;
+ u64 offset;
if (info->wi_seg_no >= info->wi_nsegs)
goto out_overflow;
- seg_handle = be32_to_cpup(seg);
- seg_length = be32_to_cpup(seg + 1);
- xdr_decode_hyper(seg + 2, &seg_offset);
- seg_offset += info->wi_seg_off;
+ xdr_decode_rdma_segment(seg, &handle, &length, &offset);
+ offset += info->wi_seg_off;
- write_len = min(remaining, seg_length - info->wi_seg_off);
+ write_len = min(remaining, length - info->wi_seg_off);
ctxt = svc_rdma_get_rw_ctxt(rdma,
(write_len >> PAGE_SHIFT) + 2);
if (!ctxt)
return -ENOMEM;
constructor(info, write_len, ctxt);
- ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle,
+ ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, handle,
DMA_TO_DEVICE);
if (ret < 0)
return -EIO;
- trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset);
+ trace_svcrdma_send_wseg(handle, write_len, offset);
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
- if (write_len == seg_length - info->wi_seg_off) {
+ if (write_len == length - info->wi_seg_off) {
seg += 4;
info->wi_seg_no++;
info->wi_seg_off = 0;
@@ -684,35 +691,24 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info,
__be32 *p)
{
- unsigned int i;
int ret;
ret = -EINVAL;
info->ri_chunklen = 0;
while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) {
- u32 rs_handle, rs_length;
- u64 rs_offset;
-
- rs_handle = be32_to_cpup(p++);
- rs_length = be32_to_cpup(p++);
- p = xdr_decode_hyper(p, &rs_offset);
+ u32 handle, length;
+ u64 offset;
- ret = svc_rdma_build_read_segment(info, rqstp,
- rs_handle, rs_length,
- rs_offset);
+ p = xdr_decode_rdma_segment(p, &handle, &length, &offset);
+ ret = svc_rdma_build_read_segment(info, rqstp, handle, length,
+ offset);
if (ret < 0)
break;
- trace_svcrdma_send_rseg(rs_handle, rs_length, rs_offset);
- info->ri_chunklen += rs_length;
+ trace_svcrdma_send_rseg(handle, length, offset);
+ info->ri_chunklen += length;
}
- /* Pages under I/O have been copied to head->rc_pages.
- * Prevent their premature release by svc_xprt_release() .
- */
- for (i = 0; i < info->ri_readctxt->rc_page_count; i++)
- rqstp->rq_pages[i] = NULL;
-
return ret;
}
@@ -807,6 +803,26 @@ out:
return ret;
}
+/* Pages under I/O have been copied to head->rc_pages. Ensure they
+ * are not released by svc_xprt_release() until the I/O is complete.
+ *
+ * This has to be done after all Read WRs are constructed to properly
+ * handle a page that is part of I/O on behalf of two different RDMA
+ * segments.
+ *
+ * Do this only if I/O has been posted. Otherwise, we do indeed want
+ * svc_xprt_release() to clean things up properly.
+ */
+static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
+ const unsigned int start,
+ const unsigned int num_pages)
+{
+ unsigned int i;
+
+ for (i = start; i < num_pages + start; i++)
+ rqstp->rq_pages[i] = NULL;
+}
+
/**
* svc_rdma_recv_read_chunk - Pull a Read chunk from the client
* @rdma: controlling RDMA transport
@@ -860,6 +876,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
ret = svc_rdma_post_chunk_ctxt(&info->ri_cc);
if (ret < 0)
goto out_err;
+ svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count);
return 0;
out_err:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 38e7c3c8c4a9..7b94d971feb3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -106,7 +106,6 @@
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
@@ -123,6 +122,13 @@ svc_rdma_next_send_ctxt(struct list_head *list)
sc_list);
}
+static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
+ struct rpc_rdma_cid *cid)
+{
+ cid->ci_queue_id = rdma->sc_sq_cq->res.id;
+ cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
+}
+
static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{
@@ -145,6 +151,8 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
goto fail2;
+ svc_rdma_send_cid_init(rdma, &ctxt->sc_cid);
+
ctxt->sc_send_wr.next = NULL;
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
@@ -269,34 +277,33 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
{
struct svcxprt_rdma *rdma = cq->cq_context;
struct ib_cqe *cqe = wc->wr_cqe;
- struct svc_rdma_send_ctxt *ctxt;
+ struct svc_rdma_send_ctxt *ctxt =
+ container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
- trace_svcrdma_wc_send(wc);
+ trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
atomic_inc(&rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
- ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
svc_rdma_send_ctxt_put(rdma, ctxt);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
}
-
- svc_xprt_put(&rdma->sc_xprt);
}
/**
* svc_rdma_send - Post a single Send WR
* @rdma: transport on which to post the WR
- * @wr: prepared Send WR to post
+ * @ctxt: send ctxt with a Send WR ready to post
*
* Returns zero the Send WR was posted successfully. Otherwise, a
* negative errno is returned.
*/
-int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
+int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
{
+ struct ib_send_wr *wr = &ctxt->sc_send_wr;
int ret;
might_sleep();
@@ -321,8 +328,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
continue;
}
- svc_xprt_get(&rdma->sc_xprt);
- trace_svcrdma_post_send(wr);
+ trace_svcrdma_post_send(ctxt);
ret = ib_post_send(rdma->sc_qp, wr, NULL);
if (ret)
break;
@@ -331,7 +337,6 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
trace_svcrdma_sq_post_err(rdma, ret);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- svc_xprt_put(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
return ret;
}
@@ -375,11 +380,8 @@ static ssize_t svc_rdma_encode_write_segment(__be32 *src,
if (!p)
return -EMSGSIZE;
- handle = be32_to_cpup(src++);
- length = be32_to_cpup(src++);
- xdr_decode_hyper(src, &offset);
+ xdr_decode_rdma_segment(src, &handle, &length, &offset);
- *p++ = cpu_to_be32(handle);
if (*remaining < length) {
/* segment only partly filled */
length = *remaining;
@@ -388,8 +390,7 @@ static ssize_t svc_rdma_encode_write_segment(__be32 *src,
/* entire segment was consumed */
*remaining -= length;
}
- *p++ = cpu_to_be32(length);
- xdr_encode_hyper(p, offset);
+ xdr_encode_rdma_segment(p, handle, length, offset);
trace_svcrdma_encode_wseg(handle, length, offset);
return len;
@@ -801,45 +802,76 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
} else {
sctxt->sc_send_wr.opcode = IB_WR_SEND;
}
- return svc_rdma_send(rdma, &sctxt->sc_send_wr);
+ return svc_rdma_send(rdma, sctxt);
}
-/* Given the client-provided Write and Reply chunks, the server was not
- * able to form a complete reply. Return an RDMA_ERROR message so the
- * client can retire this RPC transaction. As above, the Send completion
- * routine releases payload pages that were part of a previous RDMA Write.
- *
- * Remote Invalidation is skipped for simplicity.
+/**
+ * svc_rdma_send_error_msg - Send an RPC/RDMA v1 error response
+ * @rdma: controlling transport context
+ * @sctxt: Send context for the response
+ * @rctxt: Receive context for incoming bad message
+ * @status: negative errno indicating error that occurred
+ *
+ * Given the client-provided Read, Write, and Reply chunks, the
+ * server was not able to parse the Call or form a complete Reply.
+ * Return an RDMA_ERROR message so the client can retire the RPC
+ * transaction.
+ *
+ * The caller does not have to release @sctxt. It is released by
+ * Send completion, or by this function on error.
*/
-static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt,
- struct svc_rqst *rqstp)
+void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *sctxt,
+ struct svc_rdma_recv_ctxt *rctxt,
+ int status)
{
- struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
__be32 *rdma_argp = rctxt->rc_recv_buf;
__be32 *p;
- rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0);
- xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
- NULL);
+ rpcrdma_set_xdrlen(&sctxt->sc_hdrbuf, 0);
+ xdr_init_encode(&sctxt->sc_stream, &sctxt->sc_hdrbuf,
+ sctxt->sc_xprt_buf, NULL);
- p = xdr_reserve_space(&ctxt->sc_stream, RPCRDMA_HDRLEN_ERR);
+ p = xdr_reserve_space(&sctxt->sc_stream,
+ rpcrdma_fixed_maxsz * sizeof(*p));
if (!p)
- return -ENOMSG;
+ goto put_ctxt;
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
*p++ = rdma->sc_fc_credits;
- *p++ = rdma_error;
- *p = err_chunk;
- trace_svcrdma_err_chunk(*rdma_argp);
+ *p = rdma_error;
+
+ switch (status) {
+ case -EPROTONOSUPPORT:
+ p = xdr_reserve_space(&sctxt->sc_stream, 3 * sizeof(*p));
+ if (!p)
+ goto put_ctxt;
+
+ *p++ = err_vers;
+ *p++ = rpcrdma_version;
+ *p = rpcrdma_version;
+ trace_svcrdma_err_vers(*rdma_argp);
+ break;
+ default:
+ p = xdr_reserve_space(&sctxt->sc_stream, sizeof(*p));
+ if (!p)
+ goto put_ctxt;
+
+ *p = err_chunk;
+ trace_svcrdma_err_chunk(*rdma_argp);
+ }
- svc_rdma_save_io_pages(rqstp, ctxt);
+ /* Remote Invalidation is skipped for simplicity. */
+ sctxt->sc_send_wr.num_sge = 1;
+ sctxt->sc_send_wr.opcode = IB_WR_SEND;
+ sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
+ if (svc_rdma_send(rdma, sctxt))
+ goto put_ctxt;
+ return;
- ctxt->sc_send_wr.num_sge = 1;
- ctxt->sc_send_wr.opcode = IB_WR_SEND;
- ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len;
- return svc_rdma_send(rdma, &ctxt->sc_send_wr);
+put_ctxt:
+ svc_rdma_send_ctxt_put(rdma, sctxt);
}
/**
@@ -930,15 +962,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (ret != -E2BIG && ret != -EINVAL)
goto err1;
- ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp);
- if (ret < 0)
- goto err1;
+ /* Send completion releases payload pages that were part
+ * of previously posted RDMA Writes.
+ */
+ svc_rdma_save_io_pages(rqstp, sctxt);
+ svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0;
err1:
svc_rdma_send_ctxt_put(rdma, sctxt);
err0:
- trace_svcrdma_send_failed(rqstp, ret);
+ trace_svcrdma_send_err(rqstp, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
return -ENOTCONN;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index d38be57b00ed..fb044792b571 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -55,7 +55,6 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/svc_rdma.h>
@@ -238,65 +237,56 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
svc_xprt_enqueue(&listen_xprt->sc_xprt);
}
-/*
- * Handles events generated on the listening endpoint. These events will be
- * either be incoming connect requests or adapter removal events.
+/**
+ * svc_rdma_listen_handler - Handle CM events generated on a listening endpoint
+ * @cma_id: the server's listener rdma_cm_id
+ * @event: details of the event
+ *
+ * Return values:
+ * %0: Do not destroy @cma_id
+ * %1: Destroy @cma_id (never returned here)
+ *
+ * NB: There is never a DEVICE_REMOVAL event for INADDR_ANY listeners.
*/
-static int rdma_listen_handler(struct rdma_cm_id *cma_id,
- struct rdma_cm_event *event)
+static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
+ struct rdma_cm_event *event)
{
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
- dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
- "event = %s (%d)\n", cma_id, cma_id->context,
- rdma_event_msg(event->event), event->event);
handle_connect_req(cma_id, &event->param.conn);
break;
default:
- /* NB: No device removal upcall for INADDR_ANY listeners */
- dprintk("svcrdma: Unexpected event on listening endpoint %p, "
- "event = %s (%d)\n", cma_id,
- rdma_event_msg(event->event), event->event);
break;
}
-
return 0;
}
-static int rdma_cma_handler(struct rdma_cm_id *cma_id,
- struct rdma_cm_event *event)
+/**
+ * svc_rdma_cma_handler - Handle CM events on client connections
+ * @cma_id: the server's listener rdma_cm_id
+ * @event: details of the event
+ *
+ * Return values:
+ * %0: Do not destroy @cma_id
+ * %1: Destroy @cma_id (never returned here)
+ */
+static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
+ struct rdma_cm_event *event)
{
struct svcxprt_rdma *rdma = cma_id->context;
struct svc_xprt *xprt = &rdma->sc_xprt;
switch (event->event) {
case RDMA_CM_EVENT_ESTABLISHED:
- /* Accept complete */
- svc_xprt_get(xprt);
- dprintk("svcrdma: Connection completed on DTO xprt=%p, "
- "cm_id=%p\n", xprt, cma_id);
clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
svc_xprt_enqueue(xprt);
break;
case RDMA_CM_EVENT_DISCONNECTED:
- dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n",
- xprt, cma_id);
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
- svc_xprt_put(xprt);
- break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
- dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
- "event = %s (%d)\n", cma_id, xprt,
- rdma_event_msg(event->event), event->event);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
- svc_xprt_put(xprt);
break;
default:
- dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
- "event = %s (%d)\n", cma_id,
- rdma_event_msg(event->event), event->event);
break;
}
return 0;
@@ -322,7 +312,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
- listen_id = rdma_create_id(net, rdma_listen_handler, cma_xprt,
+ listen_id = rdma_create_id(net, svc_rdma_listen_handler, cma_xprt,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
@@ -486,7 +476,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
goto errout;
/* Swap out the handler */
- newxprt->sc_cm_id->event_handler = rdma_cma_handler;
+ newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler;
/* Construct RDMA-CM private message */
pmsg.cp_magic = rpcrdma_cmp_magic;
@@ -540,24 +530,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
return NULL;
}
-/*
- * When connected, an svc_xprt has at least two references:
- *
- * - A reference held by the cm_id between the ESTABLISHED and
- * DISCONNECTED events. If the remote peer disconnected first, this
- * reference could be gone.
- *
- * - A reference held by the svc_recv code that called this function
- * as part of close processing.
- *
- * At a minimum one references should still be held.
- */
static void svc_rdma_detach(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
- /* Disconnect and flush posted WQE */
rdma_disconnect(rdma->sc_cm_id);
}
@@ -567,6 +544,7 @@ static void __svc_rdma_free(struct work_struct *work)
container_of(work, struct svcxprt_rdma, sc_work);
struct svc_xprt *xprt = &rdma->sc_xprt;
+ /* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 18fa6067bb7f..b74e2741f74f 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -561,7 +561,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct iov_iter msg_iter;
- char *kaddr = kmap(page);
+ char *kaddr;
struct kvec iov;
int rc;
@@ -576,6 +576,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
goto out;
}
+ kaddr = kmap(page);
iov.iov_base = kaddr + offset;
iov.iov_len = size;
iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 710bd44eaa49..9a3d9fedd7aa 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -935,7 +935,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
int ret = 0;
int pending;
- if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
+ if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_CMSG_COMPAT))
return -EOPNOTSUPP;
mutex_lock(&tls_ctx->tx_lock);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 27bbcfad9c17..9e93bc201cc0 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1032,7 +1032,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
}
/* Connected sockets that can produce data can be written. */
- if (sk->sk_state == TCP_ESTABLISHED) {
+ if (transport && sk->sk_state == TCP_ESTABLISHED) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
bool space_avail_now = false;
int ret = transport->notify_poll_out(
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 042ea9b40c7b..d5280fd6f9c1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -122,7 +122,7 @@ struct xfrm_pol_inexact_bin {
/* list containing '*:*' policies */
struct hlist_head hhead;
- seqcount_t count;
+ seqcount_spinlock_t count;
/* tree sorted by daddr/prefix */
struct rb_root root_d;
@@ -155,7 +155,7 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
__read_mostly;
static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_t xfrm_policy_hash_generation;
+static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
static struct rhashtable xfrm_policy_inexact_table;
static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -719,7 +719,7 @@ xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
INIT_HLIST_HEAD(&bin->hhead);
bin->root_d = RB_ROOT;
bin->root_s = RB_ROOT;
- seqcount_init(&bin->count);
+ seqcount_spinlock_init(&bin->count, &net->xfrm.xfrm_policy_lock);
prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
&bin->k, &bin->head,
@@ -1899,7 +1899,7 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
static struct xfrm_pol_inexact_node *
xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
- seqcount_t *count,
+ seqcount_spinlock_t *count,
const xfrm_address_t *addr, u16 family)
{
const struct rb_node *parent;
@@ -4157,7 +4157,7 @@ void __init xfrm_init(void)
{
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
- seqcount_init(&xfrm_policy_hash_generation);
+ seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
xfrm_input_init();
#ifdef CONFIG_XFRM_ESPINTCP