summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c21
-rw-r--r--net/9p/trans_common.c1
-rw-r--r--net/ax25/af_ax25.c11
-rw-r--r--net/ax25/ax25_dev.c2
-rw-r--r--net/compat.c15
-rw-r--r--net/core/ethtool.c12
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/sock.c15
-rw-r--r--net/ipv4/fib_rules.c8
-rw-r--r--net/ipv4/ip_gre.c9
-rw-r--r--net/ipv4/ip_tunnel.c9
-rw-r--r--net/ipv4/ip_vti.c12
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/ip6_fib.c6
-rw-r--r--net/ipv6/ip6_gre.c10
-rw-r--r--net/ipv6/ip6_tunnel.c10
-rw-r--r--net/ipv6/ip6_vti.c8
-rw-r--r--net/ipv6/ip6mr.c17
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c10
-rw-r--r--net/ipv6/sit.c3
-rw-r--r--net/netfilter/nf_conncount.c290
-rw-r--r--net/netfilter/nf_tables_api.c2
-rw-r--r--net/netfilter/nft_connlimit.c14
-rw-r--r--net/netrom/af_netrom.c15
-rw-r--r--net/rds/tcp.c2
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/auth.c116
-rw-r--r--net/sunrpc/auth_generic.c293
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c47
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c8
-rw-r--r--net/sunrpc/auth_null.c4
-rw-r--r--net/sunrpc/auth_unix.c110
-rw-r--r--net/sunrpc/backchannel_rqst.c2
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c29
-rw-r--r--net/sunrpc/rpc_pipe.c19
-rw-r--r--net/sunrpc/rpcb_clnt.c12
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/svc.c24
-rw-r--r--net/sunrpc/svc_xprt.c9
-rw-r--r--net/sunrpc/svcsock.c122
-rw-r--r--net/sunrpc/xprtmultipath.c4
-rw-r--r--net/sunrpc/xprtrdma/Makefile3
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c59
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c337
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c209
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c78
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c63
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c57
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c59
-rw-r--r--net/sunrpc/xprtrdma/transport.c92
-rw-r--r--net/sunrpc/xprtrdma/verbs.c255
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h81
-rw-r--r--net/sunrpc/xprtsock.c22
-rw-r--r--net/tipc/bearer.c1
-rw-r--r--net/tipc/netlink_compat.c2
61 files changed, 884 insertions, 1782 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 2c9a17b9b46b..357214a51f13 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -181,6 +181,12 @@ static int parse_opts(char *opts, struct p9_client *clnt)
ret = r;
continue;
}
+ if (option < 4096) {
+ p9_debug(P9_DEBUG_ERROR,
+ "msize should be at least 4k\n");
+ ret = -EINVAL;
+ continue;
+ }
clnt->msize = option;
break;
case Opt_trans:
@@ -983,10 +989,18 @@ static int p9_client_version(struct p9_client *c)
else if (!strncmp(version, "9P2000", 6))
c->proto_version = p9_proto_legacy;
else {
+ p9_debug(P9_DEBUG_ERROR,
+ "server returned an unknown version: %s\n", version);
err = -EREMOTEIO;
goto error;
}
+ if (msize < 4096) {
+ p9_debug(P9_DEBUG_ERROR,
+ "server returned a msize < 4096: %d\n", msize);
+ err = -EREMOTEIO;
+ goto error;
+ }
if (msize < c->msize)
c->msize = msize;
@@ -1043,6 +1057,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (clnt->msize > clnt->trans_mod->maxsize)
clnt->msize = clnt->trans_mod->maxsize;
+ if (clnt->msize < 4096) {
+ p9_debug(P9_DEBUG_ERROR,
+ "Please specify a msize of at least 4k\n");
+ err = -EINVAL;
+ goto free_client;
+ }
+
err = p9_client_version(clnt);
if (err)
goto close_trans;
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index b718db2085b2..3dff68f05fb9 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -14,6 +14,7 @@
#include <linux/mm.h>
#include <linux/module.h>
+#include "trans_common.h"
/**
* p9_release_pages - Release pages after the transaction.
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c603d33d5410..5d01edf8d819 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -653,15 +653,22 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
}
- dev = dev_get_by_name(&init_net, devname);
+ rtnl_lock();
+ dev = __dev_get_by_name(&init_net, devname);
if (!dev) {
+ rtnl_unlock();
res = -ENODEV;
break;
}
ax25->ax25_dev = ax25_dev_ax25dev(dev);
+ if (!ax25->ax25_dev) {
+ rtnl_unlock();
+ res = -ENODEV;
+ break;
+ }
ax25_fillin_cb(ax25, ax25->ax25_dev);
- dev_put(dev);
+ rtnl_unlock();
break;
default:
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 9a3a301e1e2f..d92195cd7834 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -116,6 +116,7 @@ void ax25_dev_device_down(struct net_device *dev)
if ((s = ax25_dev_list) == ax25_dev) {
ax25_dev_list = s->next;
spin_unlock_bh(&ax25_dev_lock);
+ dev->ax25_ptr = NULL;
dev_put(dev);
kfree(ax25_dev);
return;
@@ -125,6 +126,7 @@ void ax25_dev_device_down(struct net_device *dev)
if (s->next == ax25_dev) {
s->next = ax25_dev->next;
spin_unlock_bh(&ax25_dev_lock);
+ dev->ax25_ptr = NULL;
dev_put(dev);
kfree(ax25_dev);
return;
diff --git a/net/compat.c b/net/compat.c
index f7084780a8f8..c3a2f868e8af 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -467,12 +467,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
ctv = (struct compat_timeval __user *) userstamp;
err = -ENOENT;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- tv = ktime_to_timeval(sk->sk_stamp);
+ tv = ktime_to_timeval(sock_read_timestamp(sk));
+
if (tv.tv_sec == -1)
return err;
if (tv.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
- tv = ktime_to_timeval(sk->sk_stamp);
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
+ tv = ktime_to_timeval(kt);
}
err = 0;
if (put_user(tv.tv_sec, &ctv->tv_sec) ||
@@ -494,12 +496,13 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta
ctv = (struct compat_timespec __user *) userstamp;
err = -ENOENT;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- ts = ktime_to_timespec(sk->sk_stamp);
+ ts = ktime_to_timespec(sock_read_timestamp(sk));
if (ts.tv_sec == -1)
return err;
if (ts.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
- ts = ktime_to_timespec(sk->sk_stamp);
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
+ ts = ktime_to_timespec(kt);
}
err = 0;
if (put_user(ts.tv_sec, &ctv->tv_sec) ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d05402868575..158264f7cfaf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -793,8 +793,13 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
if (rc >= 0)
info.n_priv_flags = rc;
}
- if (ops->get_regs_len)
- info.regdump_len = ops->get_regs_len(dev);
+ if (ops->get_regs_len) {
+ int ret = ops->get_regs_len(dev);
+
+ if (ret > 0)
+ info.regdump_len = ret;
+ }
+
if (ops->get_eeprom_len)
info.eedump_len = ops->get_eeprom_len(dev);
@@ -1337,6 +1342,9 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
return -EFAULT;
reglen = ops->get_regs_len(dev);
+ if (reglen <= 0)
+ return reglen;
+
if (regs.len > reglen)
regs.len = reglen;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 48f61885fd6f..5ea1bed08ede 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4104,6 +4104,11 @@ static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (!addr) {
+ NL_SET_ERR_MSG(extack, "Missing lookup address for fdb get request");
+ return -EINVAL;
+ }
+
if (brport_idx) {
dev = __dev_get_by_index(net, brport_idx);
if (!dev) {
diff --git a/net/core/sock.c b/net/core/sock.c
index f00902c532cc..6aa2e7e0b4fb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2751,6 +2751,9 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_stamp = SK_DEFAULT_STAMP;
+#if BITS_PER_LONG==32
+ seqlock_init(&sk->sk_stamp_seq);
+#endif
atomic_set(&sk->sk_zckey, 0);
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -2850,12 +2853,13 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
struct timeval tv;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- tv = ktime_to_timeval(sk->sk_stamp);
+ tv = ktime_to_timeval(sock_read_timestamp(sk));
if (tv.tv_sec == -1)
return -ENOENT;
if (tv.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
- tv = ktime_to_timeval(sk->sk_stamp);
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
+ tv = ktime_to_timeval(kt);
}
return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
}
@@ -2866,11 +2870,12 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
struct timespec ts;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- ts = ktime_to_timespec(sk->sk_stamp);
+ ts = ktime_to_timespec(sock_read_timestamp(sk));
if (ts.tv_sec == -1)
return -ENOENT;
if (ts.tv_sec == 0) {
- sk->sk_stamp = ktime_get_real();
+ ktime_t kt = ktime_get_real();
+ sock_write_timestamp(sk, kt);
ts = ktime_to_timespec(sk->sk_stamp);
}
return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f8eb78d042a4..cfec3af54c8d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -198,11 +198,15 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
static struct fib_table *fib_empty_table(struct net *net)
{
- u32 id;
+ u32 id = 1;
- for (id = 1; id <= RT_TABLE_MAX; id++)
+ while (1) {
if (!fib_get_table(net, id))
return fib_new_table(net, id);
+
+ if (id++ == RT_TABLE_MAX)
+ break;
+ }
return NULL;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c7a7bd58a23c..d1d09f3e5f9e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -676,6 +676,9 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tnl_params;
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
if (tunnel->collect_md) {
gre_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
@@ -719,6 +722,9 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
bool truncate = false;
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
if (tunnel->collect_md) {
erspan_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
@@ -762,6 +768,9 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
if (tunnel->collect_md) {
gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
return NETDEV_TX_OK;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 284a22154b4e..c4f5602308ed 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -627,7 +627,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- unsigned int inner_nhdr_len = 0;
const struct iphdr *inner_iph;
struct flowi4 fl4;
u8 tos, ttl;
@@ -637,14 +636,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
__be32 dst;
bool connected;
- /* ensure we can access the inner net header, for several users below */
- if (skb->protocol == htons(ETH_P_IP))
- inner_nhdr_len = sizeof(struct iphdr);
- else if (skb->protocol == htons(ETH_P_IPV6))
- inner_nhdr_len = sizeof(struct ipv6hdr);
- if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
- goto tx_error;
-
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index de31b302d69c..d7b43e700023 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -241,6 +241,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
struct flowi fl;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
memset(&fl, 0, sizeof(fl));
switch (skb->protocol) {
@@ -253,15 +256,18 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
break;
default:
- dev->stats.tx_errors++;
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
+ goto tx_err;
}
/* override mark with tunnel output key */
fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key);
return vti_xmit(skb, dev, &fl);
+
+tx_err:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
}
static int vti4_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 521e471f1cf9..8eeec6eb2bd3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4736,8 +4736,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
idev = ipv6_find_idev(dev);
- if (IS_ERR(idev))
- return PTR_ERR(idev);
+ if (!idev)
+ return -ENOBUFS;
if (!ipv6_allow_optimistic_dad(net, idev))
cfg.ifa_flags &= ~IFA_F_OPTIMISTIC;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index f0cd291034f0..0bfb6cc0a30a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -350,6 +350,9 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
err = -EINVAL;
goto out_unlock;
}
+ }
+
+ if (sk->sk_bound_dev_if) {
dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
if (!dev) {
err = -ENODEV;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ae3786132c23..6613d8dbb0e5 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -627,7 +627,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
return -ENOENT;
}
- res = fib6_dump_table(tb, skb, cb);
+ if (!cb->args[0]) {
+ res = fib6_dump_table(tb, skb, cb);
+ if (!res)
+ cb->args[0] = 1;
+ }
goto out;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 229e55c99021..09d0826742f8 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -881,6 +881,9 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
struct net_device_stats *stats = &t->dev->stats;
int ret;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
goto tx_err;
@@ -923,6 +926,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
int nhoff;
int thoff;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
goto tx_err;
@@ -995,8 +1001,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
}
} else {
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1004,7 +1008,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
&dsfield, &encap_limit);
break;
case htons(ETH_P_IPV6):
- if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ if (ipv6_addr_equal(&t->parms.raddr, &ipv6_hdr(skb)->saddr))
goto tx_err;
if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
&dsfield, &encap_limit))
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 99179b9c8384..0c6403cf8b52 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1243,10 +1243,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
- /* ensure we can access the full inner ip header */
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- return -1;
-
iph = ip_hdr(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1321,9 +1317,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
- if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
- return -1;
-
ipv6h = ipv6_hdr(skb);
tproto = READ_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
@@ -1405,6 +1398,9 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_device_stats *stats = &t->dev->stats;
int ret;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
ret = ip4ip6_tnl_xmit(skb, dev);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 706fe42e4928..8b6eefff2f7e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -522,18 +522,18 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
- struct ipv6hdr *ipv6h;
struct flowi fl;
int ret;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
memset(&fl, 0, sizeof(fl));
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- ipv6h = ipv6_hdr(skb);
-
if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- vti6_addr_conflict(t, ipv6h))
+ vti6_addr_conflict(t, ipv6_hdr(skb)))
goto tx_err;
xfrm_decode_session(skb, &fl, AF_INET6);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8276f1224f16..30337b38274b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -51,6 +51,7 @@
#include <linux/export.h>
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
+#include <net/ip_tunnels.h>
#include <linux/nospec.h>
@@ -599,13 +600,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_mark = skb->mark,
};
- int err;
- err = ip6mr_fib_lookup(net, &fl6, &mrt);
- if (err < 0) {
- kfree_skb(skb);
- return err;
- }
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
+ if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
+ goto tx_err;
read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
@@ -614,6 +614,11 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
read_unlock(&mrt_lock);
kfree_skb(skb);
return NETDEV_TX_OK;
+
+tx_err:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
}
static int reg_vif_get_iflink(const struct net_device *dev)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a5bb59ee50ac..36a3d8dc61f5 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -210,7 +210,7 @@ found:
if (next && next->ip_defrag_offset < end)
goto discard_fq;
- /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ /* Note : skb->ip_defrag_offset and skb->sk share the same location */
dev = skb->dev;
if (dev)
fq->iif = dev->ifindex;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 194bc162866d..40b225f87d5e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -210,7 +210,9 @@ struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
n = __ipv6_neigh_lookup(dev, daddr);
if (n)
return n;
- return neigh_create(&nd_tbl, daddr, dev);
+
+ n = neigh_create(&nd_tbl, daddr, dev);
+ return IS_ERR(n) ? NULL : n;
}
static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
@@ -5054,12 +5056,16 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
{
struct net *net;
int delay;
+ int ret;
if (!write)
return -EINVAL;
net = (struct net *)ctl->extra1;
delay = net->ipv6.sysctl.flush_delay;
- proc_dointvec(ctl, write, buffer, lenp, ppos);
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+
fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
return 0;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 51c9f75f34b9..1e03305c0549 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1021,6 +1021,9 @@ tx_error:
static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ if (!pskb_inet_may_pull(skb))
+ goto tx_err;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 9cd180bda092..7554c56b2e63 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -33,12 +33,6 @@
#define CONNCOUNT_SLOTS 256U
-#ifdef CONFIG_LOCKDEP
-#define CONNCOUNT_LOCK_SLOTS 8U
-#else
-#define CONNCOUNT_LOCK_SLOTS 256U
-#endif
-
#define CONNCOUNT_GC_MAX_NODES 8
#define MAX_KEYLEN 5
@@ -49,8 +43,6 @@ struct nf_conncount_tuple {
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
- bool dead;
- struct rcu_head rcu_head;
};
struct nf_conncount_rb {
@@ -60,7 +52,7 @@ struct nf_conncount_rb {
struct rcu_head rcu_head;
};
-static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
+static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp;
struct nf_conncount_data {
unsigned int keylen;
@@ -89,79 +81,25 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32));
}
-enum nf_conncount_list_add
-nf_conncount_add(struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone)
-{
- struct nf_conncount_tuple *conn;
-
- if (WARN_ON_ONCE(list->count > INT_MAX))
- return NF_CONNCOUNT_ERR;
-
- conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
- if (conn == NULL)
- return NF_CONNCOUNT_ERR;
-
- conn->tuple = *tuple;
- conn->zone = *zone;
- conn->cpu = raw_smp_processor_id();
- conn->jiffies32 = (u32)jiffies;
- conn->dead = false;
- spin_lock_bh(&list->list_lock);
- if (list->dead == true) {
- kmem_cache_free(conncount_conn_cachep, conn);
- spin_unlock_bh(&list->list_lock);
- return NF_CONNCOUNT_SKIP;
- }
- list_add_tail(&conn->node, &list->head);
- list->count++;
- spin_unlock_bh(&list->list_lock);
- return NF_CONNCOUNT_ADDED;
-}
-EXPORT_SYMBOL_GPL(nf_conncount_add);
-
-static void __conn_free(struct rcu_head *h)
-{
- struct nf_conncount_tuple *conn;
-
- conn = container_of(h, struct nf_conncount_tuple, rcu_head);
- kmem_cache_free(conncount_conn_cachep, conn);
-}
-
-static bool conn_free(struct nf_conncount_list *list,
+static void conn_free(struct nf_conncount_list *list,
struct nf_conncount_tuple *conn)
{
- bool free_entry = false;
-
- spin_lock_bh(&list->list_lock);
-
- if (conn->dead) {
- spin_unlock_bh(&list->list_lock);
- return free_entry;
- }
+ lockdep_assert_held(&list->list_lock);
list->count--;
- conn->dead = true;
- list_del_rcu(&conn->node);
- if (list->count == 0) {
- list->dead = true;
- free_entry = true;
- }
+ list_del(&conn->node);
- spin_unlock_bh(&list->list_lock);
- call_rcu(&conn->rcu_head, __conn_free);
- return free_entry;
+ kmem_cache_free(conncount_conn_cachep, conn);
}
static const struct nf_conntrack_tuple_hash *
find_or_evict(struct net *net, struct nf_conncount_list *list,
- struct nf_conncount_tuple *conn, bool *free_entry)
+ struct nf_conncount_tuple *conn)
{
const struct nf_conntrack_tuple_hash *found;
unsigned long a, b;
int cpu = raw_smp_processor_id();
- __s32 age;
+ u32 age;
found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
if (found)
@@ -176,52 +114,45 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
*/
age = a - b;
if (conn->cpu == cpu || age >= 2) {
- *free_entry = conn_free(list, conn);
+ conn_free(list, conn);
return ERR_PTR(-ENOENT);
}
return ERR_PTR(-EAGAIN);
}
-void nf_conncount_lookup(struct net *net,
- struct nf_conncount_list *list,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone,
- bool *addit)
+static int __nf_conncount_add(struct net *net,
+ struct nf_conncount_list *list,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
unsigned int collect = 0;
- bool free_entry = false;
-
- /* best effort only */
- *addit = tuple ? true : false;
/* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES)
break;
- found = find_or_evict(net, list, conn, &free_entry);
+ found = find_or_evict(net, list, conn);
if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) {
- if (!tuple)
- continue;
-
if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
nf_ct_zone_id(zone, zone->dir))
- *addit = false;
- } else if (PTR_ERR(found) == -ENOENT)
+ return 0; /* already exists */
+ } else {
collect++;
+ }
continue;
}
found_ct = nf_ct_tuplehash_to_ctrack(found);
- if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_equal(found_ct, zone, zone->dir)) {
/*
* We should not see tuples twice unless someone hooks
@@ -229,7 +160,8 @@ void nf_conncount_lookup(struct net *net,
*
* Attempt to avoid a re-add in this case.
*/
- *addit = false;
+ nf_ct_put(found_ct);
+ return 0;
} else if (already_closed(found_ct)) {
/*
* we do not care about connections which are
@@ -243,19 +175,48 @@ void nf_conncount_lookup(struct net *net,
nf_ct_put(found_ct);
}
+
+ if (WARN_ON_ONCE(list->count > INT_MAX))
+ return -EOVERFLOW;
+
+ conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+ if (conn == NULL)
+ return -ENOMEM;
+
+ conn->tuple = *tuple;
+ conn->zone = *zone;
+ conn->cpu = raw_smp_processor_id();
+ conn->jiffies32 = (u32)jiffies;
+ list_add_tail(&conn->node, &list->head);
+ list->count++;
+ return 0;
}
-EXPORT_SYMBOL_GPL(nf_conncount_lookup);
+
+int nf_conncount_add(struct net *net,
+ struct nf_conncount_list *list,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
+{
+ int ret;
+
+ /* check the saved connections */
+ spin_lock_bh(&list->list_lock);
+ ret = __nf_conncount_add(net, list, tuple, zone);
+ spin_unlock_bh(&list->list_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_add);
void nf_conncount_list_init(struct nf_conncount_list *list)
{
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 0;
- list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
-/* Return true if the list is empty */
+/* Return true if the list is empty. Must be called with BH disabled. */
bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_list *list)
{
@@ -263,17 +224,17 @@ bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
unsigned int collected = 0;
- bool free_entry = false;
bool ret = false;
+ /* don't bother if other cpu is already doing GC */
+ if (!spin_trylock(&list->list_lock))
+ return false;
+
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
- found = find_or_evict(net, list, conn, &free_entry);
+ found = find_or_evict(net, list, conn);
if (IS_ERR(found)) {
- if (PTR_ERR(found) == -ENOENT) {
- if (free_entry)
- return true;
+ if (PTR_ERR(found) == -ENOENT)
collected++;
- }
continue;
}
@@ -284,23 +245,19 @@ bool nf_conncount_gc_list(struct net *net,
* closed already -> ditch it
*/
nf_ct_put(found_ct);
- if (conn_free(list, conn))
- return true;
+ conn_free(list, conn);
collected++;
continue;
}
nf_ct_put(found_ct);
if (collected > CONNCOUNT_GC_MAX_NODES)
- return false;
+ break;
}
- spin_lock_bh(&list->list_lock);
- if (!list->count) {
- list->dead = true;
+ if (!list->count)
ret = true;
- }
- spin_unlock_bh(&list->list_lock);
+ spin_unlock(&list->list_lock);
return ret;
}
@@ -314,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h)
kmem_cache_free(conncount_rb_cachep, rbconn);
}
+/* caller must hold tree nf_conncount_locks[] lock */
static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[],
unsigned int gc_count)
@@ -323,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
spin_lock(&rbconn->list.list_lock);
- rb_erase(&rbconn->node, root);
- call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+ if (!rbconn->list.count) {
+ rb_erase(&rbconn->node, root);
+ call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+ }
spin_unlock(&rbconn->list.list_lock);
}
}
@@ -341,20 +301,19 @@ insert_tree(struct net *net,
struct rb_root *root,
unsigned int hash,
const u32 *key,
- u8 keylen,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
- enum nf_conncount_list_add ret;
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
struct rb_node **rbnode, *parent;
struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn;
unsigned int count = 0, gc_count = 0;
- bool node_found = false;
-
- spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+ u8 keylen = data->keylen;
+ bool do_gc = true;
+ spin_lock_bh(&nf_conncount_locks[hash]);
+restart:
parent = NULL;
rbnode = &(root->rb_node);
while (*rbnode) {
@@ -368,45 +327,32 @@ insert_tree(struct net *net,
} else if (diff > 0) {
rbnode = &((*rbnode)->rb_right);
} else {
- /* unlikely: other cpu added node already */
- node_found = true;
- ret = nf_conncount_add(&rbconn->list, tuple, zone);
- if (ret == NF_CONNCOUNT_ERR) {
+ int ret;
+
+ ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
+ if (ret)
count = 0; /* hotdrop */
- } else if (ret == NF_CONNCOUNT_ADDED) {
+ else
count = rbconn->list.count;
- } else {
- /* NF_CONNCOUNT_SKIP, rbconn is already
- * reclaimed by gc, insert a new tree node
- */
- node_found = false;
- }
- break;
+ tree_nodes_free(root, gc_nodes, gc_count);
+ goto out_unlock;
}
if (gc_count >= ARRAY_SIZE(gc_nodes))
continue;
- if (nf_conncount_gc_list(net, &rbconn->list))
+ if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
gc_nodes[gc_count++] = rbconn;
}
if (gc_count) {
tree_nodes_free(root, gc_nodes, gc_count);
- /* tree_node_free before new allocation permits
- * allocator to re-use newly free'd object.
- *
- * This is a rare event; in most cases we will find
- * existing node to re-use. (or gc_count is 0).
- */
-
- if (gc_count >= ARRAY_SIZE(gc_nodes))
- schedule_gc_worker(data, hash);
+ schedule_gc_worker(data, hash);
+ gc_count = 0;
+ do_gc = false;
+ goto restart;
}
- if (node_found)
- goto out_unlock;
-
/* expected case: match, insert new node */
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
if (rbconn == NULL)
@@ -430,7 +376,7 @@ insert_tree(struct net *net,
rb_link_node_rcu(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
out_unlock:
- spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+ spin_unlock_bh(&nf_conncount_locks[hash]);
return count;
}
@@ -441,7 +387,6 @@ count_tree(struct net *net,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
- enum nf_conncount_list_add ret;
struct rb_root *root;
struct rb_node *parent;
struct nf_conncount_rb *rbconn;
@@ -454,7 +399,6 @@ count_tree(struct net *net,
parent = rcu_dereference_raw(root->rb_node);
while (parent) {
int diff;
- bool addit;
rbconn = rb_entry(parent, struct nf_conncount_rb, node);
@@ -464,31 +408,36 @@ count_tree(struct net *net,
} else if (diff > 0) {
parent = rcu_dereference_raw(parent->rb_right);
} else {
- /* same source network -> be counted! */
- nf_conncount_lookup(net, &rbconn->list, tuple, zone,
- &addit);
+ int ret;
- if (!addit)
+ if (!tuple) {
+ nf_conncount_gc_list(net, &rbconn->list);
return rbconn->list.count;
+ }
- ret = nf_conncount_add(&rbconn->list, tuple, zone);
- if (ret == NF_CONNCOUNT_ERR) {
- return 0; /* hotdrop */
- } else if (ret == NF_CONNCOUNT_ADDED) {
- return rbconn->list.count;
- } else {
- /* NF_CONNCOUNT_SKIP, rbconn is already
- * reclaimed by gc, insert a new tree node
- */
+ spin_lock_bh(&rbconn->list.list_lock);
+ /* Node might be about to be free'd.
+ * We need to defer to insert_tree() in this case.
+ */
+ if (rbconn->list.count == 0) {
+ spin_unlock_bh(&rbconn->list.list_lock);
break;
}
+
+ /* same source network -> be counted! */
+ ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
+ spin_unlock_bh(&rbconn->list.list_lock);
+ if (ret)
+ return 0; /* hotdrop */
+ else
+ return rbconn->list.count;
}
}
if (!tuple)
return 0;
- return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
+ return insert_tree(net, data, root, hash, key, tuple, zone);
}
static void tree_gc_worker(struct work_struct *work)
@@ -499,27 +448,47 @@ static void tree_gc_worker(struct work_struct *work)
struct rb_node *node;
unsigned int tree, next_tree, gc_count = 0;
- tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
+ tree = data->gc_tree % CONNCOUNT_SLOTS;
root = &data->root[tree];
+ local_bh_disable();
rcu_read_lock();
for (node = rb_first(root); node != NULL; node = rb_next(node)) {
rbconn = rb_entry(node, struct nf_conncount_rb, node);
if (nf_conncount_gc_list(data->net, &rbconn->list))
- gc_nodes[gc_count++] = rbconn;
+ gc_count++;
}
rcu_read_unlock();
+ local_bh_enable();
+
+ cond_resched();
spin_lock_bh(&nf_conncount_locks[tree]);
+ if (gc_count < ARRAY_SIZE(gc_nodes))
+ goto next; /* do not bother */
- if (gc_count) {
- tree_nodes_free(root, gc_nodes, gc_count);
+ gc_count = 0;
+ node = rb_first(root);
+ while (node != NULL) {
+ rbconn = rb_entry(node, struct nf_conncount_rb, node);
+ node = rb_next(node);
+
+ if (rbconn->list.count > 0)
+ continue;
+
+ gc_nodes[gc_count++] = rbconn;
+ if (gc_count >= ARRAY_SIZE(gc_nodes)) {
+ tree_nodes_free(root, gc_nodes, gc_count);
+ gc_count = 0;
+ }
}
+ tree_nodes_free(root, gc_nodes, gc_count);
+next:
clear_bit(tree, data->pending_trees);
next_tree = (tree + 1) % CONNCOUNT_SLOTS;
- next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
+ next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree);
if (next_tree < CONNCOUNT_SLOTS) {
data->gc_tree = next_tree;
@@ -621,10 +590,7 @@ static int __init nf_conncount_modinit(void)
{
int i;
- BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS);
- BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
-
- for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
+ for (i = 0; i < CONNCOUNT_SLOTS; ++i)
spin_lock_init(&nf_conncount_locks[i]);
conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fec814dace5a..2b0a93300dd7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5727,6 +5727,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
+ if (!nest)
+ goto nla_put_failure;
if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index b90d96ba4a12..af1497ab9464 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int count;
- bool addit;
tuple_ptr = &tuple;
@@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
return;
}
- nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
- &addit);
- count = priv->list.count;
-
- if (!addit)
- goto out;
-
- if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
+ if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
regs->verdict.code = NF_DROP;
return;
}
- count++;
-out:
+
+ count = priv->list.count;
if ((count > priv->limit) ^ priv->invert) {
regs->verdict.code = NFT_BREAK;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 03f37c4e64fe..1d3144d19903 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -153,7 +153,7 @@ static struct sock *nr_find_listener(ax25_address *addr)
sk_for_each(s, &nr_list)
if (!ax25cmp(&nr_sk(s)->source_addr, addr) &&
s->sk_state == TCP_LISTEN) {
- bh_lock_sock(s);
+ sock_hold(s);
goto found;
}
s = NULL;
@@ -174,7 +174,7 @@ static struct sock *nr_find_socket(unsigned char index, unsigned char id)
struct nr_sock *nr = nr_sk(s);
if (nr->my_index == index && nr->my_id == id) {
- bh_lock_sock(s);
+ sock_hold(s);
goto found;
}
}
@@ -198,7 +198,7 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id,
if (nr->your_index == index && nr->your_id == id &&
!ax25cmp(&nr->dest_addr, dest)) {
- bh_lock_sock(s);
+ sock_hold(s);
goto found;
}
}
@@ -224,7 +224,7 @@ static unsigned short nr_find_next_circuit(void)
if (i != 0 && j != 0) {
if ((sk=nr_find_socket(i, j)) == NULL)
break;
- bh_unlock_sock(sk);
+ sock_put(sk);
}
id++;
@@ -920,6 +920,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
}
if (sk != NULL) {
+ bh_lock_sock(sk);
skb_reset_transport_header(skb);
if (frametype == NR_CONNACK && skb->len == 22)
@@ -929,6 +930,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
ret = nr_process_rx_frame(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
return ret;
}
@@ -960,10 +962,12 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
(make = nr_make_new(sk)) == NULL) {
nr_transmit_refusal(skb, 0);
if (sk)
- bh_unlock_sock(sk);
+ sock_put(sk);
return 0;
}
+ bh_lock_sock(sk);
+
window = skb->data[20];
skb->sk = make;
@@ -1016,6 +1020,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
sk->sk_data_ready(sk);
bh_unlock_sock(sk);
+ sock_put(sk);
nr_insert_socket(make);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index b9bbcf3d6c63..c16f0a362c32 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -623,7 +623,7 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
if (rtn->rds_tcp_sysctl)
unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
- if (net != &init_net && rtn->ctl_table)
+ if (net != &init_net)
kfree(rtn->ctl_table);
}
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 090658c3da12..9488600451e8 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
- auth.o auth_null.o auth_unix.o auth_generic.o \
+ auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
addr.o rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o \
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index ad8ead738981..1ff9768f5456 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -39,6 +39,20 @@ static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
static LIST_HEAD(cred_unused);
static unsigned long number_cred_unused;
+static struct cred machine_cred = {
+ .usage = ATOMIC_INIT(1),
+};
+
+/*
+ * Return the machine_cred pointer to be used whenever
+ * the a generic machine credential is needed.
+ */
+const struct cred *rpc_machine_cred(void)
+{
+ return &machine_cred;
+}
+EXPORT_SYMBOL_GPL(rpc_machine_cred);
+
#define MAX_HASHTABLE_BITS (14)
static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
{
@@ -346,29 +360,6 @@ out_nocache:
}
EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
-/*
- * Setup a credential key lifetime timeout notification
- */
-int
-rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
-{
- if (!cred->cr_auth->au_ops->key_timeout)
- return 0;
- return cred->cr_auth->au_ops->key_timeout(auth, cred);
-}
-EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
-
-bool
-rpcauth_cred_key_to_expire(struct rpc_auth *auth, struct rpc_cred *cred)
-{
- if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT)
- return false;
- if (!cred->cr_ops->crkey_to_expire)
- return false;
- return cred->cr_ops->crkey_to_expire(cred);
-}
-EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
-
char *
rpcauth_stringify_acceptor(struct rpc_cred *cred)
{
@@ -587,13 +578,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
if (!entry->cr_ops->crmatch(acred, entry, flags))
continue;
- if (flags & RPCAUTH_LOOKUP_RCU) {
- if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) ||
- refcount_read(&entry->cr_count) == 0)
- continue;
- cred = entry;
- break;
- }
cred = get_rpccred(entry);
if (cred)
break;
@@ -603,9 +587,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
if (cred != NULL)
goto found;
- if (flags & RPCAUTH_LOOKUP_RCU)
- return ERR_PTR(-ECHILD);
-
new = auth->au_ops->crcreate(auth, acred, flags, gfp);
if (IS_ERR(new)) {
cred = new;
@@ -656,9 +637,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
auth->au_ops->au_name);
memset(&acred, 0, sizeof(acred));
- acred.uid = cred->fsuid;
- acred.gid = cred->fsgid;
- acred.group_info = cred->group_info;
+ acred.cred = cred;
ret = auth->au_ops->lookup_cred(auth, &acred, flags);
return ret;
}
@@ -672,31 +651,41 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
INIT_LIST_HEAD(&cred->cr_lru);
refcount_set(&cred->cr_count, 1);
cred->cr_auth = auth;
+ cred->cr_flags = 0;
cred->cr_ops = ops;
cred->cr_expire = jiffies;
- cred->cr_uid = acred->uid;
+ cred->cr_cred = get_cred(acred->cred);
}
EXPORT_SYMBOL_GPL(rpcauth_init_cred);
-struct rpc_cred *
-rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
+static struct rpc_cred *
+rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
{
- dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
- cred->cr_auth->au_ops->au_name, cred);
- return get_rpccred(cred);
+ struct rpc_auth *auth = task->tk_client->cl_auth;
+ struct auth_cred acred = {
+ .cred = get_task_cred(&init_task),
+ };
+ struct rpc_cred *ret;
+
+ dprintk("RPC: %5u looking up %s cred\n",
+ task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
+ ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
+ put_cred(acred.cred);
+ return ret;
}
-EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
static struct rpc_cred *
-rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
+rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
struct auth_cred acred = {
- .uid = GLOBAL_ROOT_UID,
- .gid = GLOBAL_ROOT_GID,
+ .principal = task->tk_client->cl_principal,
+ .cred = init_task.cred,
};
- dprintk("RPC: %5u looking up %s cred\n",
+ if (!acred.principal)
+ return NULL;
+ dprintk("RPC: %5u looking up %s machine cred\n",
task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
}
@@ -712,18 +701,33 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
}
static int
-rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
+rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags)
{
struct rpc_rqst *req = task->tk_rqstp;
- struct rpc_cred *new;
+ struct rpc_cred *new = NULL;
int lookupflags = 0;
+ struct rpc_auth *auth = task->tk_client->cl_auth;
+ struct auth_cred acred = {
+ .cred = cred,
+ };
if (flags & RPC_TASK_ASYNC)
lookupflags |= RPCAUTH_LOOKUP_NEW;
- if (cred != NULL)
- new = cred->cr_ops->crbind(task, cred, lookupflags);
- else if (flags & RPC_TASK_ROOTCREDS)
+ if (task->tk_op_cred)
+ /* Task must use exactly this rpc_cred */
+ new = get_rpccred(task->tk_op_cred);
+ else if (cred != NULL && cred != &machine_cred)
+ new = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
+ else if (cred == &machine_cred)
+ new = rpcauth_bind_machine_cred(task, lookupflags);
+
+ /* If machine cred couldn't be bound, try a root cred */
+ if (new)
+ ;
+ else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS))
new = rpcauth_bind_root_cred(task, lookupflags);
+ else if (flags & RPC_TASK_NULLCREDS)
+ new = authnull_ops.lookup_cred(NULL, NULL, 0);
else
new = rpcauth_bind_new_cred(task, lookupflags);
if (IS_ERR(new))
@@ -901,15 +905,10 @@ int __init rpcauth_init_module(void)
err = rpc_init_authunix();
if (err < 0)
goto out1;
- err = rpc_init_generic_auth();
- if (err < 0)
- goto out2;
err = register_shrinker(&rpc_cred_shrinker);
if (err < 0)
- goto out3;
+ goto out2;
return 0;
-out3:
- rpc_destroy_generic_auth();
out2:
rpc_destroy_authunix();
out1:
@@ -919,6 +918,5 @@ out1:
void rpcauth_remove_module(void)
{
rpc_destroy_authunix();
- rpc_destroy_generic_auth();
unregister_shrinker(&rpc_cred_shrinker);
}
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
deleted file mode 100644
index ab4a3be1542a..000000000000
--- a/net/sunrpc/auth_generic.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Generic RPC credential
- *
- * Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com>
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sunrpc/auth.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/sched.h>
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-#define RPC_MACHINE_CRED_USERID GLOBAL_ROOT_UID
-#define RPC_MACHINE_CRED_GROUPID GLOBAL_ROOT_GID
-
-struct generic_cred {
- struct rpc_cred gc_base;
- struct auth_cred acred;
-};
-
-static struct rpc_auth generic_auth;
-static const struct rpc_credops generic_credops;
-
-/*
- * Public call interface
- */
-struct rpc_cred *rpc_lookup_cred(void)
-{
- return rpcauth_lookupcred(&generic_auth, 0);
-}
-EXPORT_SYMBOL_GPL(rpc_lookup_cred);
-
-struct rpc_cred *
-rpc_lookup_generic_cred(struct auth_cred *acred, int flags, gfp_t gfp)
-{
- return rpcauth_lookup_credcache(&generic_auth, acred, flags, gfp);
-}
-EXPORT_SYMBOL_GPL(rpc_lookup_generic_cred);
-
-struct rpc_cred *rpc_lookup_cred_nonblock(void)
-{
- return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
-}
-EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
-
-/*
- * Public call interface for looking up machine creds.
- */
-struct rpc_cred *rpc_lookup_machine_cred(const char *service_name)
-{
- struct auth_cred acred = {
- .uid = RPC_MACHINE_CRED_USERID,
- .gid = RPC_MACHINE_CRED_GROUPID,
- .principal = service_name,
- .machine_cred = 1,
- };
-
- dprintk("RPC: looking up machine cred for service %s\n",
- service_name);
- return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0);
-}
-EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
-
-static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
- struct rpc_cred *cred, int lookupflags)
-{
- struct rpc_auth *auth = task->tk_client->cl_auth;
- struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
-
- return auth->au_ops->lookup_cred(auth, acred, lookupflags);
-}
-
-static int
-generic_hash_cred(struct auth_cred *acred, unsigned int hashbits)
-{
- return hash_64(from_kgid(&init_user_ns, acred->gid) |
- ((u64)from_kuid(&init_user_ns, acred->uid) <<
- (sizeof(gid_t) * 8)), hashbits);
-}
-
-/*
- * Lookup generic creds for current process
- */
-static struct rpc_cred *
-generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
-{
- return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL);
-}
-
-static struct rpc_cred *
-generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
-{
- struct generic_cred *gcred;
-
- gcred = kmalloc(sizeof(*gcred), gfp);
- if (gcred == NULL)
- return ERR_PTR(-ENOMEM);
-
- rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops);
- gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
-
- gcred->acred.uid = acred->uid;
- gcred->acred.gid = acred->gid;
- gcred->acred.group_info = acred->group_info;
- gcred->acred.ac_flags = 0;
- if (gcred->acred.group_info != NULL)
- get_group_info(gcred->acred.group_info);
- gcred->acred.machine_cred = acred->machine_cred;
- gcred->acred.principal = acred->principal;
-
- dprintk("RPC: allocated %s cred %p for uid %d gid %d\n",
- gcred->acred.machine_cred ? "machine" : "generic",
- gcred,
- from_kuid(&init_user_ns, acred->uid),
- from_kgid(&init_user_ns, acred->gid));
- return &gcred->gc_base;
-}
-
-static void
-generic_free_cred(struct rpc_cred *cred)
-{
- struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
-
- dprintk("RPC: generic_free_cred %p\n", gcred);
- if (gcred->acred.group_info != NULL)
- put_group_info(gcred->acred.group_info);
- kfree(gcred);
-}
-
-static void
-generic_free_cred_callback(struct rcu_head *head)
-{
- struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu);
- generic_free_cred(cred);
-}
-
-static void
-generic_destroy_cred(struct rpc_cred *cred)
-{
- call_rcu(&cred->cr_rcu, generic_free_cred_callback);
-}
-
-static int
-machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags)
-{
- if (!gcred->acred.machine_cred ||
- gcred->acred.principal != acred->principal ||
- !uid_eq(gcred->acred.uid, acred->uid) ||
- !gid_eq(gcred->acred.gid, acred->gid))
- return 0;
- return 1;
-}
-
-/*
- * Match credentials against current process creds.
- */
-static int
-generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
-{
- struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
- int i;
-
- if (acred->machine_cred)
- return machine_cred_match(acred, gcred, flags);
-
- if (!uid_eq(gcred->acred.uid, acred->uid) ||
- !gid_eq(gcred->acred.gid, acred->gid) ||
- gcred->acred.machine_cred != 0)
- goto out_nomatch;
-
- /* Optimisation in the case where pointers are identical... */
- if (gcred->acred.group_info == acred->group_info)
- goto out_match;
-
- /* Slow path... */
- if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
- goto out_nomatch;
- for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
- if (!gid_eq(gcred->acred.group_info->gid[i],
- acred->group_info->gid[i]))
- goto out_nomatch;
- }
-out_match:
- return 1;
-out_nomatch:
- return 0;
-}
-
-int __init rpc_init_generic_auth(void)
-{
- return rpcauth_init_credcache(&generic_auth);
-}
-
-void rpc_destroy_generic_auth(void)
-{
- rpcauth_destroy_credcache(&generic_auth);
-}
-
-/*
- * Test the the current time (now) against the underlying credential key expiry
- * minus a timeout and setup notification.
- *
- * The normal case:
- * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
- * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
- * rpc_credops crmatch routine to notify this generic cred when it's key
- * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
- *
- * The error case:
- * If the underlying cred lookup fails, return -EACCES.
- *
- * The 'almost' error case:
- * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
- * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
- * on the acred ac_flags and return 0.
- */
-static int
-generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
-{
- struct auth_cred *acred = &container_of(cred, struct generic_cred,
- gc_base)->acred;
- struct rpc_cred *tcred;
- int ret = 0;
-
-
- /* Fast track for non crkey_timeout (no key) underlying credentials */
- if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT)
- return 0;
-
- /* Fast track for the normal case */
- if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
- return 0;
-
- /* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
- tcred = auth->au_ops->lookup_cred(auth, acred, 0);
- if (IS_ERR(tcred))
- return -EACCES;
-
- /* Test for the almost error case */
- ret = tcred->cr_ops->crkey_timeout(tcred);
- if (ret != 0) {
- set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
- ret = 0;
- } else {
- /* In case underlying cred key has been reset */
- if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
- &acred->ac_flags))
- dprintk("RPC: UID %d Credential key reset\n",
- from_kuid(&init_user_ns, tcred->cr_uid));
- /* set up fasttrack for the normal case */
- set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
- }
-
- put_rpccred(tcred);
- return ret;
-}
-
-static const struct rpc_authops generic_auth_ops = {
- .owner = THIS_MODULE,
- .au_name = "Generic",
- .hash_cred = generic_hash_cred,
- .lookup_cred = generic_lookup_cred,
- .crcreate = generic_create_cred,
- .key_timeout = generic_key_timeout,
-};
-
-static struct rpc_auth generic_auth = {
- .au_ops = &generic_auth_ops,
- .au_count = REFCOUNT_INIT(1),
-};
-
-static bool generic_key_to_expire(struct rpc_cred *cred)
-{
- struct auth_cred *acred = &container_of(cred, struct generic_cred,
- gc_base)->acred;
- return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
-}
-
-static const struct rpc_credops generic_credops = {
- .cr_name = "Generic cred",
- .crdestroy = generic_destroy_cred,
- .crbind = generic_bind_cred,
- .crmatch = generic_match,
- .crkey_to_expire = generic_key_to_expire,
-};
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index ba765473d1f0..dc86713b32b6 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -565,7 +565,7 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
struct gss_cred *gss_cred = container_of(cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_new, *gss_msg;
- kuid_t uid = cred->cr_uid;
+ kuid_t uid = cred->cr_cred->fsuid;
gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
if (IS_ERR(gss_new))
@@ -604,7 +604,7 @@ gss_refresh_upcall(struct rpc_task *task)
int err = 0;
dprintk("RPC: %5u %s for uid %u\n",
- task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
+ task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
/* XXX: warning on the first, under the assumption we
@@ -637,7 +637,7 @@ gss_refresh_upcall(struct rpc_task *task)
out:
dprintk("RPC: %5u %s for uid %u result %d\n",
task->tk_pid, __func__,
- from_kuid(&init_user_ns, cred->cr_uid), err);
+ from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
return err;
}
@@ -653,7 +653,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
int err;
dprintk("RPC: %s for uid %u\n",
- __func__, from_kuid(&init_user_ns, cred->cr_uid));
+ __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
retry:
err = 0;
/* if gssd is down, just skip upcalling altogether */
@@ -701,7 +701,7 @@ out_intr:
gss_release_msg(gss_msg);
out:
dprintk("RPC: %s for uid %u result %d\n",
- __func__, from_kuid(&init_user_ns, cred->cr_uid), err);
+ __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
return err;
}
@@ -1248,7 +1248,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
if (new) {
struct auth_cred acred = {
- .uid = gss_cred->gc_base.cr_uid,
+ .cred = gss_cred->gc_base.cr_cred,
};
struct gss_cl_ctx *ctx =
rcu_dereference_protected(gss_cred->gc_ctx, 1);
@@ -1343,6 +1343,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
+ put_cred(cred->cr_cred);
call_rcu(&cred->cr_rcu, gss_free_cred_callback);
if (ctx)
gss_put_ctx(ctx);
@@ -1361,7 +1362,7 @@ gss_destroy_cred(struct rpc_cred *cred)
static int
gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
{
- return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits);
+ return hash_64(from_kuid(&init_user_ns, acred->cred->fsuid), hashbits);
}
/*
@@ -1381,7 +1382,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
int err = -ENOMEM;
dprintk("RPC: %s for uid %d, flavor %d\n",
- __func__, from_kuid(&init_user_ns, acred->uid),
+ __func__, from_kuid(&init_user_ns, acred->cred->fsuid),
auth->au_flavor);
if (!(cred = kzalloc(sizeof(*cred), gfp)))
@@ -1394,9 +1395,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
*/
cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
cred->gc_service = gss_auth->service;
- cred->gc_principal = NULL;
- if (acred->machine_cred)
- cred->gc_principal = acred->principal;
+ cred->gc_principal = acred->principal;
kref_get(&gss_auth->kref);
return &cred->gc_base;
@@ -1518,23 +1517,10 @@ out:
if (gss_cred->gc_principal == NULL)
return 0;
ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
- goto check_expire;
- }
- if (gss_cred->gc_principal != NULL)
- return 0;
- ret = uid_eq(rc->cr_uid, acred->uid);
-
-check_expire:
- if (ret == 0)
- return ret;
-
- /* Notify acred users of GSS context expiration timeout */
- if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
- (gss_key_timeout(rc) != 0)) {
- /* test will now be done from generic cred */
- test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
- /* tell NFS layer that key will expire soon */
- set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+ } else {
+ if (gss_cred->gc_principal != NULL)
+ return 0;
+ ret = uid_eq(rc->cr_cred->fsuid, acred->cred->fsuid);
}
return ret;
}
@@ -1607,9 +1593,8 @@ static int gss_renew_cred(struct rpc_task *task)
gc_base);
struct rpc_auth *auth = oldcred->cr_auth;
struct auth_cred acred = {
- .uid = oldcred->cr_uid,
+ .cred = oldcred->cr_cred,
.principal = gss_cred->gc_principal,
- .machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0),
};
struct rpc_cred *new;
@@ -2110,7 +2095,6 @@ static const struct rpc_credops gss_credops = {
.cr_name = "AUTH_GSS",
.crdestroy = gss_destroy_cred,
.cr_init = gss_cred_init,
- .crbind = rpcauth_generic_bind_cred,
.crmatch = gss_match,
.crmarshal = gss_marshal,
.crrefresh = gss_refresh,
@@ -2125,7 +2109,6 @@ static const struct rpc_credops gss_credops = {
static const struct rpc_credops gss_nullops = {
.cr_name = "AUTH_GSS",
.crdestroy = gss_destroy_nullcred,
- .crbind = rpcauth_generic_bind_cred,
.crmatch = gss_match,
.crmarshal = gss_marshal,
.crrefresh = gss_refresh_null,
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 16ac0f4cb7d8..379318dff534 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -244,7 +244,7 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
/**
* gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
- * @array: array to fill in
+ * @array_ptr: array to fill in
* @size: size of "array"
*
* Returns the number of array items filled in, or a negative errno.
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1ece4bc3eb8d..152790ed309c 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1142,7 +1142,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
struct kvec *resv = &rqstp->rq_res.head[0];
struct rsi *rsip, rsikey;
int ret;
- struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+ struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
memset(&rsikey, 0, sizeof(rsikey));
ret = gss_read_verf(gc, argv, authp,
@@ -1253,7 +1253,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
uint64_t handle;
int status;
int ret;
- struct net *net = rqstp->rq_xprt->xpt_net;
+ struct net *net = SVC_NET(rqstp);
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
memset(&ud, 0, sizeof(ud));
@@ -1444,7 +1444,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
__be32 *rpcstart;
__be32 *reject_stat = resv->iov_base + resv->iov_len;
int ret;
- struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+ struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n",
argv->iov_len);
@@ -1734,7 +1734,7 @@ svcauth_gss_release(struct svc_rqst *rqstp)
struct rpc_gss_wire_cred *gc = &gsd->clcred;
struct xdr_buf *resbuf = &rqstp->rq_res;
int stat = -EINVAL;
- struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+ struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
if (gc->gc_proc != RPC_GSS_PROC_DATA)
goto out;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 2694a1bc026b..d0ceac57c06e 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -36,8 +36,6 @@ nul_destroy(struct rpc_auth *auth)
static struct rpc_cred *
nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
- if (flags & RPCAUTH_LOOKUP_RCU)
- return &null_cred;
return get_rpccred(&null_cred);
}
@@ -116,7 +114,6 @@ static
struct rpc_auth null_auth = {
.au_cslack = NUL_CALLSLACK,
.au_rslack = NUL_REPLYSLACK,
- .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
.au_ops = &authnull_ops,
.au_flavor = RPC_AUTH_NULL,
.au_count = REFCOUNT_INIT(1),
@@ -126,7 +123,6 @@ static
const struct rpc_credops null_credops = {
.cr_name = "AUTH_NULL",
.crdestroy = nul_destroy_cred,
- .crbind = rpcauth_generic_bind_cred,
.crmatch = nul_match,
.crmarshal = nul_marshal,
.crrefresh = nul_refresh,
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 4c1c7e56288f..387f6b3ffbea 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -11,16 +11,11 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/module.h>
+#include <linux/mempool.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/auth.h>
#include <linux/user_namespace.h>
-struct unx_cred {
- struct rpc_cred uc_base;
- kgid_t uc_gid;
- kgid_t uc_gids[UNX_NGROUPS];
-};
-#define uc_uid uc_base.cr_uid
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -28,6 +23,7 @@ struct unx_cred {
static struct rpc_auth unix_auth;
static const struct rpc_credops unix_credops;
+static mempool_t *unix_pool;
static struct rpc_auth *
unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
@@ -42,15 +38,6 @@ static void
unx_destroy(struct rpc_auth *auth)
{
dprintk("RPC: destroying UNIX authenticator %p\n", auth);
- rpcauth_clear_credcache(auth->au_credcache);
-}
-
-static int
-unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
-{
- return hash_64(from_kgid(&init_user_ns, acred->gid) |
- ((u64)from_kuid(&init_user_ns, acred->uid) <<
- (sizeof(gid_t) * 8)), hashbits);
}
/*
@@ -59,52 +46,24 @@ unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
static struct rpc_cred *
unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
- return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS);
-}
-
-static struct rpc_cred *
-unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp)
-{
- struct unx_cred *cred;
- unsigned int groups = 0;
- unsigned int i;
+ struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);
dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
- from_kuid(&init_user_ns, acred->uid),
- from_kgid(&init_user_ns, acred->gid));
-
- if (!(cred = kmalloc(sizeof(*cred), gfp)))
- return ERR_PTR(-ENOMEM);
+ from_kuid(&init_user_ns, acred->cred->fsuid),
+ from_kgid(&init_user_ns, acred->cred->fsgid));
- rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
- cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
-
- if (acred->group_info != NULL)
- groups = acred->group_info->ngroups;
- if (groups > UNX_NGROUPS)
- groups = UNX_NGROUPS;
-
- cred->uc_gid = acred->gid;
- for (i = 0; i < groups; i++)
- cred->uc_gids[i] = acred->group_info->gid[i];
- if (i < UNX_NGROUPS)
- cred->uc_gids[i] = INVALID_GID;
-
- return &cred->uc_base;
-}
-
-static void
-unx_free_cred(struct unx_cred *unx_cred)
-{
- dprintk("RPC: unx_free_cred %p\n", unx_cred);
- kfree(unx_cred);
+ rpcauth_init_cred(ret, acred, auth, &unix_credops);
+ ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+ return ret;
}
static void
unx_free_cred_callback(struct rcu_head *head)
{
- struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
- unx_free_cred(unx_cred);
+ struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu);
+ dprintk("RPC: unx_free_cred %p\n", rpc_cred);
+ put_cred(rpc_cred->cr_cred);
+ mempool_free(rpc_cred, unix_pool);
}
static void
@@ -114,30 +73,32 @@ unx_destroy_cred(struct rpc_cred *cred)
}
/*
- * Match credentials against current process creds.
- * The root_override argument takes care of cases where the caller may
- * request root creds (e.g. for NFS swapping).
+ * Match credentials against current the auth_cred.
*/
static int
-unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
+unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
{
- struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base);
unsigned int groups = 0;
unsigned int i;
+ if (cred->cr_cred == acred->cred)
+ return 1;
- if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
+ if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid))
return 0;
- if (acred->group_info != NULL)
- groups = acred->group_info->ngroups;
+ if (acred->cred && acred->cred->group_info != NULL)
+ groups = acred->cred->group_info->ngroups;
if (groups > UNX_NGROUPS)
groups = UNX_NGROUPS;
+ if (cred->cr_cred->group_info == NULL)
+ return groups == 0;
+ if (groups != cred->cr_cred->group_info->ngroups)
+ return 0;
+
for (i = 0; i < groups ; i++)
- if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
+ if (!gid_eq(cred->cr_cred->group_info->gid[i], acred->cred->group_info->gid[i]))
return 0;
- if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups]))
- return 0;
return 1;
}
@@ -149,9 +110,10 @@ static __be32 *
unx_marshal(struct rpc_task *task, __be32 *p)
{
struct rpc_clnt *clnt = task->tk_client;
- struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
__be32 *base, *hold;
int i;
+ struct group_info *gi = cred->cr_cred->group_info;
*p++ = htonl(RPC_AUTH_UNIX);
base = p++;
@@ -162,11 +124,12 @@ unx_marshal(struct rpc_task *task, __be32 *p)
*/
p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
- *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
- *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
+ *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid));
+ *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid));
hold = p++;
- for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++)
- *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
+ if (gi)
+ for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
+ *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i]));
*hold = htonl(p - hold - 1); /* gid array length */
*base = htonl((p - base - 1) << 2); /* cred length */
@@ -213,12 +176,13 @@ unx_validate(struct rpc_task *task, __be32 *p)
int __init rpc_init_authunix(void)
{
- return rpcauth_init_credcache(&unix_auth);
+ unix_pool = mempool_create_kmalloc_pool(16, sizeof(struct rpc_cred));
+ return unix_pool ? 0 : -ENOMEM;
}
void rpc_destroy_authunix(void)
{
- rpcauth_destroy_credcache(&unix_auth);
+ mempool_destroy(unix_pool);
}
const struct rpc_authops authunix_ops = {
@@ -227,16 +191,13 @@ const struct rpc_authops authunix_ops = {
.au_name = "UNIX",
.create = unx_create,
.destroy = unx_destroy,
- .hash_cred = unx_hash_cred,
.lookup_cred = unx_lookup_cred,
- .crcreate = unx_create_cred,
};
static
struct rpc_auth unix_auth = {
.au_cslack = UNX_CALLSLACK,
.au_rslack = NUL_REPLYSLACK,
- .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
.au_ops = &authunix_ops,
.au_flavor = RPC_AUTH_UNIX,
.au_count = REFCOUNT_INIT(1),
@@ -246,7 +207,6 @@ static
const struct rpc_credops unix_credops = {
.cr_name = "AUTH_UNIX",
.crdestroy = unx_destroy_cred,
- .crbind = rpcauth_generic_bind_cred,
.crmatch = unx_match,
.crmarshal = unx_marshal,
.crrefresh = unx_refresh,
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index fa5ba6ed3197..ec451b8114b0 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -197,7 +197,7 @@ out_free:
/**
* xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
* @xprt: the transport holding the preallocated strucures
- * @max_reqs the maximum number of preallocated structures to destroy
+ * @max_reqs: the maximum number of preallocated structures to destroy
*
* Since these structures may have been allocated by multiple calls
* to xprt_setup_backchannel, we only destroy up to the maximum number
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f96345b1180e..12bb23b8e0c5 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,6 +54,11 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
h->last_refresh = now;
}
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+ struct cache_detail *detail);
+static void cache_fresh_unlocked(struct cache_head *head,
+ struct cache_detail *detail);
+
static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
struct cache_head *key,
int hash)
@@ -100,6 +105,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
if (cache_is_expired(detail, tmp)) {
hlist_del_init_rcu(&tmp->cache_list);
detail->entries --;
+ cache_fresh_locked(tmp, 0, detail);
freeme = tmp;
break;
}
@@ -115,8 +121,10 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
cache_get(new);
spin_unlock(&detail->hash_lock);
- if (freeme)
+ if (freeme) {
+ cache_fresh_unlocked(freeme, detail);
cache_put(freeme, detail);
+ }
return new;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 24cbddc44c88..71d9599b5816 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -627,6 +627,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
new->cl_noretranstimeo = clnt->cl_noretranstimeo;
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
+ new->cl_principal = clnt->cl_principal;
return new;
out_err:
@@ -1029,7 +1030,7 @@ rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
task->tk_msg.rpc_argp = msg->rpc_argp;
task->tk_msg.rpc_resp = msg->rpc_resp;
if (msg->rpc_cred != NULL)
- task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred);
+ task->tk_msg.rpc_cred = get_cred(msg->rpc_cred);
}
}
@@ -2521,9 +2522,8 @@ static int rpc_ping(struct rpc_clnt *clnt)
.rpc_proc = &rpcproc_null,
};
int err;
- msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0);
- err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN);
- put_rpccred(msg.rpc_cred);
+ err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
+ RPC_TASK_NULLCREDS);
return err;
}
@@ -2534,15 +2534,15 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
{
struct rpc_message msg = {
.rpc_proc = &rpcproc_null,
- .rpc_cred = cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
.rpc_xprt = xprt,
.rpc_message = &msg,
+ .rpc_op_cred = cred,
.callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
.callback_data = data,
- .flags = flags,
+ .flags = flags | RPC_TASK_NULLCREDS,
};
return rpc_run_task(&task_setup_data);
@@ -2593,7 +2593,6 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
void *dummy)
{
struct rpc_cb_add_xprt_calldata *data;
- struct rpc_cred *cred;
struct rpc_task *task;
data = kmalloc(sizeof(*data), GFP_NOFS);
@@ -2602,11 +2601,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
data->xps = xprt_switch_get(xps);
data->xprt = xprt_get(xprt);
- cred = authnull_ops.lookup_cred(NULL, NULL, 0);
- task = rpc_call_null_helper(clnt, xprt, cred,
- RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC,
+ task = rpc_call_null_helper(clnt, xprt, NULL,
+ RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS,
&rpc_cb_add_xprt_call_ops, data);
- put_rpccred(cred);
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
@@ -2637,7 +2634,6 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
void *data)
{
- struct rpc_cred *cred;
struct rpc_task *task;
struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data;
int status = -EADDRINUSE;
@@ -2649,11 +2645,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
goto out_err;
/* Test the connection */
- cred = authnull_ops.lookup_cred(NULL, NULL, 0);
- task = rpc_call_null_helper(clnt, xprt, cred,
- RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ task = rpc_call_null_helper(clnt, xprt, NULL,
+ RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
NULL, NULL);
- put_rpccred(cred);
if (IS_ERR(task)) {
status = PTR_ERR(task);
goto out_err;
@@ -2667,6 +2661,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
/* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
xtest->add_xprt_test(clnt, xprt, xtest->data);
+ xprt_put(xprt);
+ xprt_switch_put(xps);
+
/* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
return 1;
out_err:
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 4fda18d47e2c..69663681bf9d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1266,7 +1266,7 @@ static const struct rpc_pipe_ops gssd_dummy_pipe_ops = {
* that this file will be there and have a certain format.
*/
static int
-rpc_show_dummy_info(struct seq_file *m, void *v)
+rpc_dummy_info_show(struct seq_file *m, void *v)
{
seq_printf(m, "RPC server: %s\n", utsname()->nodename);
seq_printf(m, "service: foo (1) version 0\n");
@@ -1275,25 +1275,12 @@ rpc_show_dummy_info(struct seq_file *m, void *v)
seq_printf(m, "port: 0\n");
return 0;
}
-
-static int
-rpc_dummy_info_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rpc_show_dummy_info, NULL);
-}
-
-static const struct file_operations rpc_dummy_info_operations = {
- .owner = THIS_MODULE,
- .open = rpc_dummy_info_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info);
static const struct rpc_filelist gssd_dummy_info_file[] = {
[0] = {
.name = "info",
- .i_fop = &rpc_dummy_info_operations,
+ .i_fop = &rpc_dummy_info_fops,
.mode = S_IFREG | 0400,
},
};
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c7872bc13860..41a971ac1c63 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -752,7 +752,7 @@ void rpcb_getport_async(struct rpc_task *task)
goto bailout_nofree;
}
- map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
+ map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS);
if (!map) {
status = -ENOMEM;
dprintk("RPC: %5u %s: no memory available\n",
@@ -770,7 +770,13 @@ void rpcb_getport_async(struct rpc_task *task)
case RPCBVERS_4:
case RPCBVERS_3:
map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
- map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC);
+ map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS);
+ if (!map->r_addr) {
+ status = -ENOMEM;
+ dprintk("RPC: %5u %s: no memory available\n",
+ task->tk_pid, __func__);
+ goto bailout_free_args;
+ }
map->r_owner = "";
break;
case RPCBVERS_2:
@@ -793,6 +799,8 @@ void rpcb_getport_async(struct rpc_task *task)
rpc_put_task(child);
return;
+bailout_free_args:
+ kfree(map);
bailout_release_client:
rpc_release_client(rpcb_clnt);
bailout_nofree:
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 57ca5bead1cb..adc3c40cc733 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -997,6 +997,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
+ task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred);
+
if (task->tk_ops->rpc_call_prepare != NULL)
task->tk_action = rpc_prepare_task;
@@ -1054,6 +1056,7 @@ static void rpc_free_task(struct rpc_task *task)
{
unsigned short tk_flags = task->tk_flags;
+ put_rpccred(task->tk_op_cred);
rpc_release_calldata(task->tk_ops, task->tk_calldata);
if (tk_flags & RPC_TASK_DYNAMIC) {
@@ -1071,7 +1074,7 @@ static void rpc_release_resources_task(struct rpc_task *task)
{
xprt_release(task);
if (task->tk_msg.rpc_cred) {
- put_rpccred(task->tk_msg.rpc_cred);
+ put_cred(task->tk_msg.rpc_cred);
task->tk_msg.rpc_cred = NULL;
}
rpc_task_release_client(task);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d13e05f1a990..e87ddb9f7feb 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1145,6 +1145,17 @@ static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ..
#endif
/*
+ * Setup response header for TCP, it has a 4B record length field.
+ */
+static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
+{
+ struct kvec *resv = &rqstp->rq_res.head[0];
+
+ /* tcp needs a space for the record length... */
+ svc_putnl(resv, 0);
+}
+
+/*
* Common routine for processing the RPC request.
*/
static int
@@ -1172,7 +1183,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
clear_bit(RQ_DROPME, &rqstp->rq_flags);
/* Setup reply header */
- rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
+ if (rqstp->rq_prot == IPPROTO_TCP)
+ svc_tcp_prep_reply_hdr(rqstp);
svc_putu32(resv, rqstp->rq_xid);
@@ -1244,7 +1256,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
* for lower versions. RPC_PROG_MISMATCH seems to be the closest
* fit.
*/
- if (versp->vs_need_cong_ctrl &&
+ if (versp->vs_need_cong_ctrl && rqstp->rq_xprt &&
!test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
goto err_bad_vers;
@@ -1336,7 +1348,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
return 0;
close:
- if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
+ if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
svc_close_xprt(rqstp->rq_xprt);
dprintk("svc: svc_process close\n");
return 0;
@@ -1459,10 +1471,10 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
dprintk("svc: %s(%p)\n", __func__, req);
/* Build the svc_rqst used by the common processing routine */
- rqstp->rq_xprt = serv->sv_bc_xprt;
rqstp->rq_xid = req->rq_xid;
rqstp->rq_prot = req->rq_xprt->prot;
rqstp->rq_server = serv;
+ rqstp->rq_bc_net = req->rq_xprt->xprt_net;
rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
@@ -1499,9 +1511,9 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
if (!proc_error) {
/* Processing error: drop the request */
xprt_free_bc_request(req);
- return 0;
+ error = -EINVAL;
+ goto out;
}
-
/* Finally, send the reply synchronously */
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 51d36230b6e3..4eb8fbf2508d 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -296,9 +296,9 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
request_module("svc%s", xprt_name);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags);
}
- if (err)
+ if (err < 0)
dprintk("svc: transport %s not found, err %d\n",
- xprt_name, err);
+ xprt_name, -err);
return err;
}
EXPORT_SYMBOL_GPL(svc_create_xprt);
@@ -468,10 +468,11 @@ out:
*/
void svc_reserve(struct svc_rqst *rqstp, int space)
{
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+
space += rqstp->rq_res.head[0].iov_len;
- if (space < rqstp->rq_reserved) {
- struct svc_xprt *xprt = rqstp->rq_xprt;
+ if (xprt && space < rqstp->rq_reserved) {
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 986f3ed7d1a2..a6a060925e5d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -70,13 +70,6 @@ static void svc_sock_free(struct svc_xprt *);
static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
- struct net *, struct sockaddr *,
- int, int);
-static void svc_bc_sock_free(struct svc_xprt *xprt);
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key svc_key[2];
static struct lock_class_key svc_slock_key[2];
@@ -549,7 +542,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
/* Don't enable netstamp, sunrpc doesn't
need that much accuracy */
}
- svsk->sk_sk->sk_stamp = skb->tstamp;
+ sock_write_timestamp(svsk->sk_sk, skb->tstamp);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
len = skb->len;
@@ -617,10 +610,6 @@ svc_udp_sendto(struct svc_rqst *rqstp)
return error;
}
-static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
-{
-}
-
static int svc_udp_has_wspace(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
@@ -664,7 +653,6 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_release_rqst = svc_release_udp_skb,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
- .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
.xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept,
.xpo_secure_port = svc_sock_secure_port,
@@ -1170,17 +1158,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
return sent;
}
-/*
- * Setup response header. TCP has a 4B record length field.
- */
-static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
-{
- struct kvec *resv = &rqstp->rq_res.head[0];
-
- /* tcp needs a space for the record length... */
- svc_putnl(resv, 0);
-}
-
static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@@ -1189,58 +1166,6 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
}
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
- struct net *, struct sockaddr *,
- int, int);
-static void svc_bc_sock_free(struct svc_xprt *xprt);
-
-static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv,
- struct net *net,
- struct sockaddr *sa, int salen,
- int flags)
-{
- return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
-}
-
-static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
-{
-}
-
-static const struct svc_xprt_ops svc_tcp_bc_ops = {
- .xpo_create = svc_bc_tcp_create,
- .xpo_detach = svc_bc_tcp_sock_detach,
- .xpo_free = svc_bc_sock_free,
- .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
- .xpo_secure_port = svc_sock_secure_port,
-};
-
-static struct svc_xprt_class svc_tcp_bc_class = {
- .xcl_name = "tcp-bc",
- .xcl_owner = THIS_MODULE,
- .xcl_ops = &svc_tcp_bc_ops,
- .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
-};
-
-static void svc_init_bc_xprt_sock(void)
-{
- svc_reg_xprt_class(&svc_tcp_bc_class);
-}
-
-static void svc_cleanup_bc_xprt_sock(void)
-{
- svc_unreg_xprt_class(&svc_tcp_bc_class);
-}
-#else /* CONFIG_SUNRPC_BACKCHANNEL */
-static void svc_init_bc_xprt_sock(void)
-{
-}
-
-static void svc_cleanup_bc_xprt_sock(void)
-{
-}
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-
static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create,
.xpo_recvfrom = svc_tcp_recvfrom,
@@ -1248,7 +1173,6 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_release_rqst = svc_release_skb,
.xpo_detach = svc_tcp_sock_detach,
.xpo_free = svc_sock_free,
- .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
@@ -1267,14 +1191,12 @@ void svc_init_xprt_sock(void)
{
svc_reg_xprt_class(&svc_tcp_class);
svc_reg_xprt_class(&svc_udp_class);
- svc_init_bc_xprt_sock();
}
void svc_cleanup_xprt_sock(void)
{
svc_unreg_xprt_class(&svc_tcp_class);
svc_unreg_xprt_class(&svc_udp_class);
- svc_cleanup_bc_xprt_sock();
}
static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
@@ -1595,45 +1517,3 @@ static void svc_sock_free(struct svc_xprt *xprt)
sock_release(svsk->sk_sock);
kfree(svsk);
}
-
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Create a back channel svc_xprt which shares the fore channel socket.
- */
-static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
- int protocol,
- struct net *net,
- struct sockaddr *sin, int len,
- int flags)
-{
- struct svc_sock *svsk;
- struct svc_xprt *xprt;
-
- if (protocol != IPPROTO_TCP) {
- printk(KERN_WARNING "svc: only TCP sockets"
- " supported on shared back channel\n");
- return ERR_PTR(-EINVAL);
- }
-
- svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
- if (!svsk)
- return ERR_PTR(-ENOMEM);
-
- xprt = &svsk->sk_xprt;
- svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
- set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
-
- serv->sv_bc_xprt = xprt;
-
- return xprt;
-}
-
-/*
- * Free a back channel svc_sock.
- */
-static void svc_bc_sock_free(struct svc_xprt *xprt)
-{
- if (xprt)
- kfree(container_of(xprt, struct svc_sock, sk_xprt));
-}
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index e2d64c7138c3..8394124126f8 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -383,7 +383,7 @@ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
/**
* xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
* @xpi: pointer to rpc_xprt_iter
- * @xps: pointer to a new rpc_xprt_switch or NULL
+ * @newswitch: pointer to a new rpc_xprt_switch or NULL
*
* Swaps out the existing xpi->xpi_xpswitch with a new value.
*/
@@ -401,7 +401,7 @@ struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi,
/**
* xprt_iter_destroy - Destroys the xprt iterator
- * @xpi pointer to rpc_xprt_iter
+ * @xpi: pointer to rpc_xprt_iter
*/
void xprt_iter_destroy(struct rpc_xprt_iter *xpi)
{
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 8bf19e142b6b..8ed0377d7a18 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,8 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
-rpcrdma-y := transport.o rpc_rdma.o verbs.o \
- fmr_ops.o frwr_ops.o \
+rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
module.o
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index e5b367a3e517..0de9b3e63770 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -5,7 +5,6 @@
* Support for backward direction RPCs on RPC/RDMA.
*/
-#include <linux/module.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svc_xprt.h>
@@ -20,29 +19,16 @@
#undef RPCRDMA_BACKCHANNEL_DEBUG
-static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
- struct rpc_rqst *rqst)
-{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
- struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
-
- spin_lock(&buf->rb_reqslock);
- list_del(&req->rl_all);
- spin_unlock(&buf->rb_reqslock);
-
- rpcrdma_destroy_req(req);
-}
-
static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
unsigned int count)
{
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ struct rpcrdma_req *req;
struct rpc_rqst *rqst;
unsigned int i;
for (i = 0; i < (count << 1); i++) {
struct rpcrdma_regbuf *rb;
- struct rpcrdma_req *req;
size_t size;
req = rpcrdma_create_req(r_xprt);
@@ -68,7 +54,7 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
return 0;
out_fail:
- rpcrdma_bc_free_rqst(r_xprt, rqst);
+ rpcrdma_req_destroy(req);
return -ENOMEM;
}
@@ -101,7 +87,6 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
goto out_free;
r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
- request_module("svcrdma");
trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0;
@@ -114,26 +99,6 @@ out_err:
}
/**
- * xprt_rdma_bc_up - Create transport endpoint for backchannel service
- * @serv: server endpoint
- * @net: network namespace
- *
- * The "xprt" is an implied argument: it supplies the name of the
- * backchannel transport class.
- *
- * Returns zero on success, negative errno on failure
- */
-int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
-{
- int ret;
-
- ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0);
- if (ret < 0)
- return ret;
- return 0;
-}
-
-/**
* xprt_rdma_bc_maxpayload - Return maximum backchannel message size
* @xprt: transport
*
@@ -193,21 +158,21 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
*/
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
{
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+ struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
int rc;
- if (!xprt_connected(rqst->rq_xprt))
- goto drop_connection;
+ if (!xprt_connected(xprt))
+ return -ENOTCONN;
- if (!xprt_request_get_cong(rqst->rq_xprt, rqst))
+ if (!xprt_request_get_cong(xprt, rqst))
return -EBADSLT;
rc = rpcrdma_bc_marshal_reply(rqst);
if (rc < 0)
goto failed_marshal;
- rpcrdma_post_recvs(r_xprt, true);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
return 0;
@@ -216,7 +181,7 @@ failed_marshal:
if (rc != -ENOTCONN)
return rc;
drop_connection:
- xprt_disconnect_done(rqst->rq_xprt);
+ xprt_rdma_close(xprt);
return -ENOTCONN;
}
@@ -227,7 +192,6 @@ drop_connection:
*/
void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
{
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpc_rqst *rqst, *tmp;
spin_lock(&xprt->bc_pa_lock);
@@ -235,7 +199,7 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
- rpcrdma_bc_free_rqst(r_xprt, rqst);
+ rpcrdma_req_destroy(rpcr_to_rdmar(rqst));
spin_lock(&xprt->bc_pa_lock);
}
@@ -251,9 +215,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpc_xprt *xprt = rqst->rq_xprt;
- dprintk("RPC: %s: freeing rqst %p (req %p)\n",
- __func__, rqst, req);
-
rpcrdma_recv_buffer_put(req->rl_reply);
req->rl_reply = NULL;
@@ -339,7 +300,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
out_overflow:
pr_warn("RPC/RDMA backchannel overflow\n");
- xprt_disconnect_done(xprt);
+ xprt_force_disconnect(xprt);
/* This receive buffer gets reposted automatically
* when the connection is re-established.
*/
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
deleted file mode 100644
index fd8fea59fe92..000000000000
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ /dev/null
@@ -1,337 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2015, 2017 Oracle. All rights reserved.
- * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
- */
-
-/* Lightweight memory registration using Fast Memory Regions (FMR).
- * Referred to sometimes as MTHCAFMR mode.
- *
- * FMR uses synchronous memory registration and deregistration.
- * FMR registration is known to be fast, but FMR deregistration
- * can take tens of usecs to complete.
- */
-
-/* Normal operation
- *
- * A Memory Region is prepared for RDMA READ or WRITE using the
- * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
- * finished, the Memory Region is unmapped using the ib_unmap_fmr
- * verb (fmr_op_unmap).
- */
-
-#include <linux/sunrpc/svc_rdma.h>
-
-#include "xprt_rdma.h"
-#include <trace/events/rpcrdma.h>
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_TRANS
-#endif
-
-/* Maximum scatter/gather per FMR */
-#define RPCRDMA_MAX_FMR_SGES (64)
-
-/* Access mode of externally registered pages */
-enum {
- RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ,
-};
-
-bool
-fmr_is_supported(struct rpcrdma_ia *ia)
-{
- if (!ia->ri_device->ops.alloc_fmr) {
- pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n",
- ia->ri_device->name);
- return false;
- }
- return true;
-}
-
-static void
-__fmr_unmap(struct rpcrdma_mr *mr)
-{
- LIST_HEAD(l);
- int rc;
-
- list_add(&mr->fmr.fm_mr->list, &l);
- rc = ib_unmap_fmr(&l);
- list_del(&mr->fmr.fm_mr->list);
- if (rc)
- pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
- mr, rc);
-}
-
-/* Release an MR.
- */
-static void
-fmr_op_release_mr(struct rpcrdma_mr *mr)
-{
- int rc;
-
- kfree(mr->fmr.fm_physaddrs);
- kfree(mr->mr_sg);
-
- /* In case this one was left mapped, try to unmap it
- * to prevent dealloc_fmr from failing with EBUSY
- */
- __fmr_unmap(mr);
-
- rc = ib_dealloc_fmr(mr->fmr.fm_mr);
- if (rc)
- pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
- mr, rc);
-
- kfree(mr);
-}
-
-/* MRs are dynamically allocated, so simply clean up and release the MR.
- * A replacement MR will subsequently be allocated on demand.
- */
-static void
-fmr_mr_recycle_worker(struct work_struct *work)
-{
- struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
- trace_xprtrdma_mr_recycle(mr);
-
- trace_xprtrdma_mr_unmap(mr);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
-
- spin_lock(&r_xprt->rx_buf.rb_mrlock);
- list_del(&mr->mr_all);
- r_xprt->rx_stats.mrs_recycled++;
- spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- fmr_op_release_mr(mr);
-}
-
-static int
-fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
-{
- static struct ib_fmr_attr fmr_attr = {
- .max_pages = RPCRDMA_MAX_FMR_SGES,
- .max_maps = 1,
- .page_shift = PAGE_SHIFT
- };
-
- mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(u64), GFP_KERNEL);
- if (!mr->fmr.fm_physaddrs)
- goto out_free;
-
- mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(*mr->mr_sg), GFP_KERNEL);
- if (!mr->mr_sg)
- goto out_free;
-
- sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
-
- mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
- &fmr_attr);
- if (IS_ERR(mr->fmr.fm_mr))
- goto out_fmr_err;
-
- INIT_LIST_HEAD(&mr->mr_list);
- INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker);
- return 0;
-
-out_fmr_err:
- dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
- PTR_ERR(mr->fmr.fm_mr));
-
-out_free:
- kfree(mr->mr_sg);
- kfree(mr->fmr.fm_physaddrs);
- return -ENOMEM;
-}
-
-/* On success, sets:
- * ep->rep_attr.cap.max_send_wr
- * ep->rep_attr.cap.max_recv_wr
- * cdata->max_requests
- * ia->ri_max_segs
- */
-static int
-fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
- struct rpcrdma_create_data_internal *cdata)
-{
- int max_qp_wr;
-
- max_qp_wr = ia->ri_device->attrs.max_qp_wr;
- max_qp_wr -= RPCRDMA_BACKWARD_WRS;
- max_qp_wr -= 1;
- if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
- return -ENOMEM;
- if (cdata->max_requests > max_qp_wr)
- cdata->max_requests = max_qp_wr;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests;
- ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
- ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
- ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
- ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
- ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
-
- ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
- RPCRDMA_MAX_FMR_SGES);
- ia->ri_max_segs += 2; /* segments for head and tail buffers */
- return 0;
-}
-
-/* FMR mode conveys up to 64 pages of payload per chunk segment.
- */
-static size_t
-fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
-{
- return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
-}
-
-/* Use the ib_map_phys_fmr() verb to register a memory region
- * for remote access via RDMA READ or RDMA WRITE.
- */
-static struct rpcrdma_mr_seg *
-fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mr **out)
-{
- struct rpcrdma_mr_seg *seg1 = seg;
- int len, pageoff, i, rc;
- struct rpcrdma_mr *mr;
- u64 *dma_pages;
-
- mr = rpcrdma_mr_get(r_xprt);
- if (!mr)
- return ERR_PTR(-EAGAIN);
-
- pageoff = offset_in_page(seg1->mr_offset);
- seg1->mr_offset -= pageoff; /* start of page */
- seg1->mr_len += pageoff;
- len = -pageoff;
- if (nsegs > RPCRDMA_MAX_FMR_SGES)
- nsegs = RPCRDMA_MAX_FMR_SGES;
- for (i = 0; i < nsegs;) {
- if (seg->mr_page)
- sg_set_page(&mr->mr_sg[i],
- seg->mr_page,
- seg->mr_len,
- offset_in_page(seg->mr_offset));
- else
- sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
- seg->mr_len);
- len += seg->mr_len;
- ++seg;
- ++i;
- /* Check for holes */
- if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
- offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
- break;
- }
- mr->mr_dir = rpcrdma_data_dir(writing);
-
- mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
- mr->mr_sg, i, mr->mr_dir);
- if (!mr->mr_nents)
- goto out_dmamap_err;
- trace_xprtrdma_mr_map(mr);
-
- for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
- dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
- rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
- dma_pages[0]);
- if (rc)
- goto out_maperr;
-
- mr->mr_handle = mr->fmr.fm_mr->rkey;
- mr->mr_length = len;
- mr->mr_offset = dma_pages[0] + pageoff;
-
- *out = mr;
- return seg;
-
-out_dmamap_err:
- pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mr->mr_sg, i);
- rpcrdma_mr_put(mr);
- return ERR_PTR(-EIO);
-
-out_maperr:
- pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
- len, (unsigned long long)dma_pages[0],
- pageoff, mr->mr_nents, rc);
- rpcrdma_mr_unmap_and_put(mr);
- return ERR_PTR(-EIO);
-}
-
-/* Post Send WR containing the RPC Call message.
- */
-static int
-fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
-{
- return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL);
-}
-
-/* Invalidate all memory regions that were registered for "req".
- *
- * Sleeps until it is safe for the host CPU to access the
- * previously mapped memory regions.
- *
- * Caller ensures that @mrs is not empty before the call. This
- * function empties the list.
- */
-static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
-{
- struct rpcrdma_mr *mr;
- LIST_HEAD(unmap_list);
- int rc;
-
- /* ORDER: Invalidate all of the req's MRs first
- *
- * ib_unmap_fmr() is slow, so use a single call instead
- * of one call per mapped FMR.
- */
- list_for_each_entry(mr, mrs, mr_list) {
- dprintk("RPC: %s: unmapping fmr %p\n",
- __func__, &mr->fmr);
- trace_xprtrdma_mr_localinv(mr);
- list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
- }
- r_xprt->rx_stats.local_inv_needed++;
- rc = ib_unmap_fmr(&unmap_list);
- if (rc)
- goto out_release;
-
- /* ORDER: Now DMA unmap all of the req's MRs, and return
- * them to the free MW list.
- */
- while (!list_empty(mrs)) {
- mr = rpcrdma_mr_pop(mrs);
- list_del(&mr->fmr.fm_mr->list);
- rpcrdma_mr_unmap_and_put(mr);
- }
-
- return;
-
-out_release:
- pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
-
- while (!list_empty(mrs)) {
- mr = rpcrdma_mr_pop(mrs);
- list_del(&mr->fmr.fm_mr->list);
- rpcrdma_mr_recycle(mr);
- }
-}
-
-const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
- .ro_map = fmr_op_map,
- .ro_send = fmr_op_send,
- .ro_unmap_sync = fmr_op_unmap_sync,
- .ro_open = fmr_op_open,
- .ro_maxpages = fmr_op_maxpages,
- .ro_init_mr = fmr_op_init_mr,
- .ro_release_mr = fmr_op_release_mr,
- .ro_displayname = "fmr",
- .ro_send_w_inv_ok = 0,
-};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index fc6378cc0c1c..6a561056b538 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -15,21 +15,21 @@
/* Normal operation
*
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
- * Work Request (frwr_op_map). When the RDMA operation is finished, this
+ * Work Request (frwr_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request
- * (frwr_op_unmap_sync).
+ * (frwr_unmap_sync).
*
* Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to
* prevent provider work queue overflows). This greatly reduces HCA
* interrupt workload.
*
- * As an optimization, frwr_op_unmap marks MRs INVALID before the
+ * As an optimization, frwr_unmap marks MRs INVALID before the
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
* rb_mrs immediately so that no work (like managing a linked list
* under a spinlock) is needed in the completion upcall.
*
- * But this means that frwr_op_map() can occasionally encounter an MR
+ * But this means that frwr_map() can occasionally encounter an MR
* that is INVALID but the LOCAL_INV WR has not completed. Work Queue
* ordering prevents a subsequent FAST_REG WR from executing against
* that MR while it is still being invalidated.
@@ -57,14 +57,14 @@
* FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
* state, and the pending WR was flushed.
*
- * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
+ * When frwr_map encounters FLUSHED and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the
* recovered MRs are placed back on the rb_mrs list when recovery is
- * complete. frwr_op_map allocates another MR for the current RPC while
+ * complete. frwr_map allocates another MR for the current RPC while
* the broken MR is reset.
*
- * To ensure that frwr_op_map doesn't encounter an MR that is marked
+ * To ensure that frwr_map doesn't encounter an MR that is marked
* INVALID but that is about to be flushed due to a previous transport
* disconnect, the transport connect worker attempts to drain all
* pending send queue WRs before the transport is reconnected.
@@ -80,8 +80,13 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-bool
-frwr_is_supported(struct rpcrdma_ia *ia)
+/**
+ * frwr_is_supported - Check if device supports FRWR
+ * @ia: interface adapter to check
+ *
+ * Returns true if device supports FRWR, otherwise false
+ */
+bool frwr_is_supported(struct rpcrdma_ia *ia)
{
struct ib_device_attr *attrs = &ia->ri_device->attrs;
@@ -97,15 +102,18 @@ out_not_supported:
return false;
}
-static void
-frwr_op_release_mr(struct rpcrdma_mr *mr)
+/**
+ * frwr_release_mr - Destroy one MR
+ * @mr: MR allocated by frwr_init_mr
+ *
+ */
+void frwr_release_mr(struct rpcrdma_mr *mr)
{
int rc;
rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc)
- pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
- mr, rc);
+ trace_xprtrdma_frwr_dereg(mr, rc);
kfree(mr->mr_sg);
kfree(mr);
}
@@ -117,60 +125,78 @@ static void
frwr_mr_recycle_worker(struct work_struct *work)
{
struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
- enum rpcrdma_frwr_state state = mr->frwr.fr_state;
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
trace_xprtrdma_mr_recycle(mr);
- if (state != FRWR_FLUSHED_LI) {
+ if (mr->mr_dir != DMA_NONE) {
trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ mr->mr_dir = DMA_NONE;
}
spin_lock(&r_xprt->rx_buf.rb_mrlock);
list_del(&mr->mr_all);
r_xprt->rx_stats.mrs_recycled++;
spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- frwr_op_release_mr(mr);
+
+ frwr_release_mr(mr);
}
-static int
-frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
+/**
+ * frwr_init_mr - Initialize one MR
+ * @ia: interface adapter
+ * @mr: generic MR to prepare for FRWR
+ *
+ * Returns zero if successful. Otherwise a negative errno
+ * is returned.
+ */
+int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
unsigned int depth = ia->ri_max_frwr_depth;
- struct rpcrdma_frwr *frwr = &mr->frwr;
+ struct scatterlist *sg;
+ struct ib_mr *frmr;
int rc;
- frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
- if (IS_ERR(frwr->fr_mr))
+ frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
+ if (IS_ERR(frmr))
goto out_mr_err;
- mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
- if (!mr->mr_sg)
+ sg = kcalloc(depth, sizeof(*sg), GFP_KERNEL);
+ if (!sg)
goto out_list_err;
+ mr->frwr.fr_mr = frmr;
+ mr->frwr.fr_state = FRWR_IS_INVALID;
+ mr->mr_dir = DMA_NONE;
INIT_LIST_HEAD(&mr->mr_list);
INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
- sg_init_table(mr->mr_sg, depth);
- init_completion(&frwr->fr_linv_done);
+ init_completion(&mr->frwr.fr_linv_done);
+
+ sg_init_table(sg, depth);
+ mr->mr_sg = sg;
return 0;
out_mr_err:
- rc = PTR_ERR(frwr->fr_mr);
- dprintk("RPC: %s: ib_alloc_mr status %i\n",
- __func__, rc);
+ rc = PTR_ERR(frmr);
+ trace_xprtrdma_frwr_alloc(mr, rc);
return rc;
out_list_err:
- rc = -ENOMEM;
dprintk("RPC: %s: sg allocation failure\n",
__func__);
- ib_dereg_mr(frwr->fr_mr);
- return rc;
+ ib_dereg_mr(frmr);
+ return -ENOMEM;
}
-/* On success, sets:
+/**
+ * frwr_open - Prepare an endpoint for use with FRWR
+ * @ia: interface adapter this endpoint will use
+ * @ep: endpoint to prepare
+ * @cdata: transport parameters
+ *
+ * On success, sets:
* ep->rep_attr.cap.max_send_wr
* ep->rep_attr.cap.max_recv_wr
* cdata->max_requests
@@ -179,10 +205,11 @@ out_list_err:
* And these FRWR-related fields:
* ia->ri_max_frwr_depth
* ia->ri_mrtype
+ *
+ * On failure, a negative errno is returned.
*/
-static int
-frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
- struct rpcrdma_create_data_internal *cdata)
+int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+ struct rpcrdma_create_data_internal *cdata)
{
struct ib_device_attr *attrs = &ia->ri_device->attrs;
int max_qp_wr, depth, delta;
@@ -191,10 +218,17 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
- ia->ri_max_frwr_depth =
- min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- attrs->max_fast_reg_page_list_len);
- dprintk("RPC: %s: device's max FR page list len = %u\n",
+ /* Quirk: Some devices advertise a large max_fast_reg_page_list_len
+ * capability, but perform optimally when the MRs are not larger
+ * than a page.
+ */
+ if (attrs->max_sge_rd > 1)
+ ia->ri_max_frwr_depth = attrs->max_sge_rd;
+ else
+ ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len;
+ if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS)
+ ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS;
+ dprintk("RPC: %s: max FR page list depth = %u\n",
__func__, ia->ri_max_frwr_depth);
/* Add room for frwr register and invalidate WRs.
@@ -242,20 +276,28 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
ia->ri_max_frwr_depth);
- ia->ri_max_segs += 2; /* segments for head and tail buffers */
+ /* Reply chunks require segments for head and tail buffers */
+ ia->ri_max_segs += 2;
+ if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS)
+ ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS;
return 0;
}
-/* FRWR mode conveys a list of pages per chunk segment. The
+/**
+ * frwr_maxpages - Compute size of largest payload
+ * @r_xprt: transport
+ *
+ * Returns maximum size of an RPC message, in pages.
+ *
+ * FRWR mode conveys a list of pages per chunk segment. The
* maximum length of that list is the FRWR page list depth.
*/
-static size_t
-frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
+size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
+ (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
}
static void
@@ -332,12 +374,25 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
trace_xprtrdma_wc_li_wake(wc, frwr);
}
-/* Post a REG_MR Work Request to register a memory region
+/**
+ * frwr_map - Register a memory region
+ * @r_xprt: controlling transport
+ * @seg: memory region co-ordinates
+ * @nsegs: number of segments remaining
+ * @writing: true when RDMA Write will be used
+ * @xid: XID of RPC using the registered memory
+ * @out: initialized MR
+ *
+ * Prepare a REG_MR Work Request to register a memory region
* for remote access via RDMA READ or RDMA WRITE.
+ *
+ * Returns the next segment or a negative errno pointer.
+ * On success, the prepared MR is planted in @out.
*/
-static struct rpcrdma_mr_seg *
-frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mr **out)
+struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_mr_seg *seg,
+ int nsegs, bool writing, u32 xid,
+ struct rpcrdma_mr **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
@@ -384,13 +439,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
- trace_xprtrdma_mr_map(mr);
ibmr = frwr->fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
if (unlikely(n != mr->mr_nents))
goto out_mapmr_err;
+ ibmr->iova &= 0x00000000ffffffff;
+ ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32;
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);
@@ -404,32 +460,35 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_handle = ibmr->rkey;
mr->mr_length = ibmr->length;
mr->mr_offset = ibmr->iova;
+ trace_xprtrdma_mr_map(mr);
*out = mr;
return seg;
out_dmamap_err:
- pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mr->mr_sg, i);
frwr->fr_state = FRWR_IS_INVALID;
+ trace_xprtrdma_frwr_sgerr(mr, i);
rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_mapmr_err:
- pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
- frwr->fr_mr, n, mr->mr_nents);
+ trace_xprtrdma_frwr_maperr(mr, n);
rpcrdma_mr_recycle(mr);
return ERR_PTR(-EIO);
}
-/* Post Send WR containing the RPC Call message.
+/**
+ * frwr_send - post Send WR containing the RPC Call message
+ * @ia: interface adapter
+ * @req: Prepared RPC Call
*
- * For FRMR, chain any FastReg WRs to the Send WR. Only a
+ * For FRWR, chain any FastReg WRs to the Send WR. Only a
* single ib_post_send call is needed to register memory
* and then post the Send WR.
+ *
+ * Returns the result of ib_post_send.
*/
-static int
-frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
{
struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr;
@@ -451,15 +510,18 @@ frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
}
/* If ib_post_send fails, the next ->send_request for
- * @req will queue these MWs for recovery.
+ * @req will queue these MRs for recovery.
*/
return ib_post_send(ia->ri_id->qp, post_wr, NULL);
}
-/* Handle a remotely invalidated mr on the @mrs list
+/**
+ * frwr_reminv - handle a remotely invalidated mr on the @mrs list
+ * @rep: Received reply
+ * @mrs: list of MRs to check
+ *
*/
-static void
-frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
+void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
{
struct rpcrdma_mr *mr;
@@ -473,7 +535,10 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
}
}
-/* Invalidate all memory regions that were registered for "req".
+/**
+ * frwr_unmap_sync - invalidate memory regions that were registered for @req
+ * @r_xprt: controlling transport
+ * @mrs: list of MRs to process
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
@@ -481,8 +546,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
* Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
-static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
+void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
struct ib_send_wr *first, **prev, *last;
const struct ib_send_wr *bad_wr;
@@ -561,20 +625,7 @@ out_release:
mr = container_of(frwr, struct rpcrdma_mr, frwr);
bad_wr = bad_wr->next;
- list_del(&mr->mr_list);
- frwr_op_release_mr(mr);
+ list_del_init(&mr->mr_list);
+ rpcrdma_mr_recycle(mr);
}
}
-
-const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
- .ro_map = frwr_op_map,
- .ro_send = frwr_op_send,
- .ro_reminv = frwr_op_reminv,
- .ro_unmap_sync = frwr_op_unmap_sync,
- .ro_open = frwr_op_open,
- .ro_maxpages = frwr_op_maxpages,
- .ro_init_mr = frwr_op_init_mr,
- .ro_release_mr = frwr_op_release_mr,
- .ro_displayname = "frwr",
- .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK,
-};
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 9f53e0240035..d18614e02b4e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -218,11 +218,12 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
page_base = offset_in_page(xdrbuf->page_base);
while (len) {
- if (unlikely(!*ppages)) {
- /* XXX: Certain upper layer operations do
- * not provide receive buffer pages.
- */
- *ppages = alloc_page(GFP_ATOMIC);
+ /* ACL likes to be lazy in allocating pages - ACLs
+ * are small by default but can get huge.
+ */
+ if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
+ if (!*ppages)
+ *ppages = alloc_page(GFP_ATOMIC);
if (!*ppages)
return -ENOBUFS;
}
@@ -356,8 +357,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
return nsegs;
do {
- seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- false, &mr);
+ seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered);
@@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
if (encode_read_segment(xdr, mr, pos) < 0)
return -EMSGSIZE;
- trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
+ trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
r_xprt->rx_stats.read_chunk_count++;
nsegs -= mr->mr_nents;
} while (nsegs);
@@ -414,8 +414,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
- seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mr);
+ seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered);
@@ -423,7 +422,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
+ trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.write_chunk_count++;
r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
@@ -472,8 +471,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
- seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mr);
+ seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered);
@@ -481,7 +479,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
+ trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.reply_chunk_count++;
r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
@@ -667,7 +665,7 @@ out_mapping_overflow:
out_mapping_err:
rpcrdma_unmap_sendctx(sc);
- pr_err("rpcrdma: Send mapping error\n");
+ trace_xprtrdma_dma_maperr(sge[sge_no].addr);
return false;
}
@@ -1188,17 +1186,20 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (!p)
break;
- dprintk("RPC: %5u: %s: server reports version error (%u-%u)\n",
- rqst->rq_task->tk_pid, __func__,
- be32_to_cpup(p), be32_to_cpu(*(p + 1)));
+ dprintk("RPC: %s: server reports "
+ "version error (%u-%u), xid %08x\n", __func__,
+ be32_to_cpup(p), be32_to_cpu(*(p + 1)),
+ be32_to_cpu(rep->rr_xid));
break;
case err_chunk:
- dprintk("RPC: %5u: %s: server reports header decoding error\n",
- rqst->rq_task->tk_pid, __func__);
+ dprintk("RPC: %s: server reports "
+ "header decoding error, xid %08x\n", __func__,
+ be32_to_cpu(rep->rr_xid));
break;
default:
- dprintk("RPC: %5u: %s: server reports unrecognized error %d\n",
- rqst->rq_task->tk_pid, __func__, be32_to_cpup(p));
+ dprintk("RPC: %s: server reports "
+ "unrecognized error %d, xid %08x\n", __func__,
+ be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
}
r_xprt->rx_stats.bad_reply_count++;
@@ -1248,7 +1249,6 @@ out:
out_badheader:
trace_xprtrdma_reply_hdr(rep);
r_xprt->rx_stats.bad_reply_count++;
- status = -EIO;
goto out;
}
@@ -1262,8 +1262,7 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* RPC has relinquished all its Send Queue entries.
*/
if (!list_empty(&req->rl_registered))
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
- &req->rl_registered);
+ frwr_unmap_sync(r_xprt, &req->rl_registered);
/* Ensure that any DMA mapped pages associated with
* the Send of the RPC Call have been unmapped before
@@ -1292,7 +1291,7 @@ void rpcrdma_deferred_completion(struct work_struct *work)
trace_xprtrdma_defer_cmp(rep);
if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
- r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
+ frwr_reminv(rep, &req->rl_registered);
rpcrdma_release_rqst(r_xprt, req);
rpcrdma_complete_rqst(rep);
}
@@ -1312,11 +1311,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits;
__be32 *p;
- --buf->rb_posted_receives;
-
- if (rep->rr_hdrbuf.head[0].iov_len == 0)
- goto out_badstatus;
-
/* Fixed transport header fields */
xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
rep->rr_hdrbuf.head[0].iov_base);
@@ -1356,36 +1350,30 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
}
req = rpcr_to_rdmar(rqst);
+ if (req->rl_reply) {
+ trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
+ rpcrdma_recv_buffer_put(req->rl_reply);
+ }
req->rl_reply = rep;
rep->rr_rqst = rqst;
clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
-
- rpcrdma_post_recvs(r_xprt, false);
- queue_work(rpcrdma_receive_wq, &rep->rr_work);
+ queue_work(buf->rb_completion_wq, &rep->rr_work);
return;
out_badversion:
trace_xprtrdma_reply_vers(rep);
- goto repost;
+ goto out;
-/* The RPC transaction has already been terminated, or the header
- * is corrupt.
- */
out_norqst:
spin_unlock(&xprt->queue_lock);
trace_xprtrdma_reply_rqst(rep);
- goto repost;
+ goto out;
out_shortreply:
trace_xprtrdma_reply_short(rep);
-/* If no pending RPC transaction was matched, post a replacement
- * receive buffer before returning.
- */
-repost:
- rpcrdma_post_recvs(r_xprt, false);
-out_badstatus:
+out:
rpcrdma_recv_buffer_put(rep);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 134bef6a451e..abdb3004a1e3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -235,9 +235,6 @@ void svc_rdma_cleanup(void)
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- svc_unreg_xprt_class(&svc_rdma_bc_class);
-#endif
svc_unreg_xprt_class(&svc_rdma_class);
}
@@ -259,8 +256,5 @@ int svc_rdma_init(void)
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- svc_reg_xprt_class(&svc_rdma_bc_class);
-#endif
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index f3c147d70286..b908f2ca08fd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -200,11 +200,10 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
svc_rdma_send_ctxt_put(rdma, ctxt);
goto drop_connection;
}
- return rc;
+ return 0;
drop_connection:
dprintk("svcrdma: failed to send bc call\n");
- xprt_disconnect_done(xprt);
return -ENOTCONN;
}
@@ -225,8 +224,11 @@ xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
ret = -ENOTCONN;
rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
- if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
ret = rpcrdma_bc_send_request(rdma, rqst);
+ if (ret == -ENOTCONN)
+ svc_close_xprt(sxprt);
+ }
mutex_unlock(&sxprt->xpt_mutex);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index b24d5b8f2fee..828b149eaaef 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -485,6 +485,68 @@ static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
return p;
}
+/* RPC-over-RDMA Version One private extension: Remote Invalidation.
+ * Responder's choice: requester signals it can handle Send With
+ * Invalidate, and responder chooses one R_key to invalidate.
+ *
+ * If there is exactly one distinct R_key in the received transport
+ * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero.
+ *
+ * Perform this operation while the received transport header is
+ * still in the CPU cache.
+ */
+static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ __be32 inv_rkey, *p;
+ u32 i, segcount;
+
+ ctxt->rc_inv_rkey = 0;
+
+ if (!rdma->sc_snd_w_inv)
+ return;
+
+ inv_rkey = xdr_zero;
+ p = ctxt->rc_recv_buf;
+ p += rpcrdma_fixed_maxsz;
+
+ /* Read list */
+ while (*p++ != xdr_zero) {
+ p++; /* position */
+ if (inv_rkey == xdr_zero)
+ inv_rkey = *p;
+ else if (inv_rkey != *p)
+ return;
+ p += 4;
+ }
+
+ /* Write list */
+ while (*p++ != xdr_zero) {
+ segcount = be32_to_cpup(p++);
+ for (i = 0; i < segcount; i++) {
+ if (inv_rkey == xdr_zero)
+ inv_rkey = *p;
+ else if (inv_rkey != *p)
+ return;
+ p += 4;
+ }
+ }
+
+ /* Reply chunk */
+ if (*p++ != xdr_zero) {
+ segcount = be32_to_cpup(p++);
+ for (i = 0; i < segcount; i++) {
+ if (inv_rkey == xdr_zero)
+ inv_rkey = *p;
+ else if (inv_rkey != *p)
+ return;
+ p += 4;
+ }
+ }
+
+ ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
+}
+
/* On entry, xdr->head[0].iov_base points to first byte in the
* RPC-over-RDMA header.
*
@@ -746,6 +808,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
}
+ svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
p += rpcrdma_fixed_maxsz;
if (*p != xdr_zero)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 8602a5f1b515..cf51b8f9b15f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -484,32 +484,6 @@ static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
*reply = NULL;
}
-/* RPC-over-RDMA Version One private extension: Remote Invalidation.
- * Responder's choice: requester signals it can handle Send With
- * Invalidate, and responder chooses one rkey to invalidate.
- *
- * Find a candidate rkey to invalidate when sending a reply. Picks the
- * first R_key it finds in the chunk lists.
- *
- * Returns zero if RPC's chunk lists are empty.
- */
-static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
- __be32 *wr_lst, __be32 *rp_ch)
-{
- __be32 *p;
-
- p = rdma_argp + rpcrdma_fixed_maxsz;
- if (*p != xdr_zero)
- p += 2;
- else if (wr_lst && be32_to_cpup(wr_lst + 1))
- p = wr_lst + 2;
- else if (rp_ch && be32_to_cpup(rp_ch + 1))
- p = rp_ch + 2;
- else
- return 0;
- return be32_to_cpup(p);
-}
-
static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt,
struct page *page,
@@ -672,7 +646,7 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
*
* RDMA Send is the last step of transmitting an RPC reply. Pages
* involved in the earlier RDMA Writes are here transferred out
- * of the rqstp and into the ctxt's page array. These pages are
+ * of the rqstp and into the sctxt's page array. These pages are
* DMA unmapped by each Write completion, but the subsequent Send
* completion finally releases these pages.
*
@@ -680,32 +654,31 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
* - The Reply's transport header will never be larger than a page.
*/
static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt,
- __be32 *rdma_argp,
+ struct svc_rdma_send_ctxt *sctxt,
+ struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp,
__be32 *wr_lst, __be32 *rp_ch)
{
int ret;
if (!rp_ch) {
- ret = svc_rdma_map_reply_msg(rdma, ctxt,
+ ret = svc_rdma_map_reply_msg(rdma, sctxt,
&rqstp->rq_res, wr_lst);
if (ret < 0)
return ret;
}
- svc_rdma_save_io_pages(rqstp, ctxt);
+ svc_rdma_save_io_pages(rqstp, sctxt);
- ctxt->sc_send_wr.opcode = IB_WR_SEND;
- if (rdma->sc_snd_w_inv) {
- ctxt->sc_send_wr.ex.invalidate_rkey =
- svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
- if (ctxt->sc_send_wr.ex.invalidate_rkey)
- ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
+ if (rctxt->rc_inv_rkey) {
+ sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
+ sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
+ } else {
+ sctxt->sc_send_wr.opcode = IB_WR_SEND;
}
dprintk("svcrdma: posting Send WR with %u sge(s)\n",
- ctxt->sc_send_wr.num_sge);
- return svc_rdma_send(rdma, &ctxt->sc_send_wr);
+ sctxt->sc_send_wr.num_sge);
+ return svc_rdma_send(rdma, &sctxt->sc_send_wr);
}
/* Given the client-provided Write and Reply chunks, the server was not
@@ -741,10 +714,6 @@ static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
return 0;
}
-void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
-{
-}
-
/**
* svc_rdma_sendto - Transmit an RPC reply
* @rqstp: processed RPC request, reply XDR already in ::rq_res
@@ -809,7 +778,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
}
svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp));
- ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp,
+ ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp,
wr_lst, rp_ch);
if (ret < 0)
goto err1;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 2f7ec8912f49..924c17d46903 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -85,7 +85,6 @@ static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_release_rqst = svc_rdma_release_rqst,
.xpo_detach = svc_rdma_detach,
.xpo_free = svc_rdma_free,
- .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
.xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept,
.xpo_secure_port = svc_rdma_secure_port,
@@ -100,64 +99,6 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
- struct sockaddr *, int, int);
-static void svc_rdma_bc_detach(struct svc_xprt *);
-static void svc_rdma_bc_free(struct svc_xprt *);
-
-static const struct svc_xprt_ops svc_rdma_bc_ops = {
- .xpo_create = svc_rdma_bc_create,
- .xpo_detach = svc_rdma_bc_detach,
- .xpo_free = svc_rdma_bc_free,
- .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
- .xpo_secure_port = svc_rdma_secure_port,
-};
-
-struct svc_xprt_class svc_rdma_bc_class = {
- .xcl_name = "rdma-bc",
- .xcl_owner = THIS_MODULE,
- .xcl_ops = &svc_rdma_bc_ops,
- .xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
-};
-
-static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
- struct net *net,
- struct sockaddr *sa, int salen,
- int flags)
-{
- struct svcxprt_rdma *cma_xprt;
- struct svc_xprt *xprt;
-
- cma_xprt = svc_rdma_create_xprt(serv, net);
- if (!cma_xprt)
- return ERR_PTR(-ENOMEM);
- xprt = &cma_xprt->sc_xprt;
-
- svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
- set_bit(XPT_CONG_CTRL, &xprt->xpt_flags);
- serv->sv_bc_xprt = xprt;
-
- dprintk("svcrdma: %s(%p)\n", __func__, xprt);
- return xprt;
-}
-
-static void svc_rdma_bc_detach(struct svc_xprt *xprt)
-{
- dprintk("svcrdma: %s(%p)\n", __func__, xprt);
-}
-
-static void svc_rdma_bc_free(struct svc_xprt *xprt)
-{
- struct svcxprt_rdma *rdma =
- container_of(xprt, struct svcxprt_rdma, sc_xprt);
-
- dprintk("svcrdma: %s(%p)\n", __func__, xprt);
- if (xprt)
- kfree(rdma);
-}
-#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-
/* QP event handler */
static void qp_event_handler(struct ib_event *event, void *context)
{
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ae2a83828953..fbc171ebfe91 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -268,7 +268,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- trace_xprtrdma_inject_dsc(r_xprt);
+ trace_xprtrdma_op_inject_dsc(r_xprt);
rdma_disconnect(r_xprt->rx_ia.ri_id);
}
@@ -284,7 +284,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- trace_xprtrdma_destroy(r_xprt);
+ trace_xprtrdma_op_destroy(r_xprt);
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
@@ -318,17 +318,12 @@ xprt_setup_rdma(struct xprt_create *args)
struct sockaddr *sap;
int rc;
- if (args->addrlen > sizeof(xprt->addr)) {
- dprintk("RPC: %s: address too large\n", __func__);
+ if (args->addrlen > sizeof(xprt->addr))
return ERR_PTR(-EBADF);
- }
xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
- if (xprt == NULL) {
- dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
- __func__);
+ if (!xprt)
return ERR_PTR(-ENOMEM);
- }
/* 60 second timeout, no retries */
xprt->timeout = &xprt_rdma_default_timeout;
@@ -399,7 +394,7 @@ xprt_setup_rdma(struct xprt_create *args)
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
xprt_rdma_connect_worker);
- xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
+ xprt->max_payload = frwr_maxpages(new_xprt);
if (xprt->max_payload == 0)
goto out4;
xprt->max_payload <<= PAGE_SHIFT;
@@ -423,7 +418,7 @@ out3:
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
- trace_xprtrdma_destroy(new_xprt);
+ trace_xprtrdma_op_destroy(new_xprt);
xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
return ERR_PTR(rc);
@@ -433,29 +428,33 @@ out1:
* xprt_rdma_close - close a transport connection
* @xprt: transport context
*
- * Called during transport shutdown, reconnect, or device removal.
+ * Called during autoclose or device removal.
+ *
* Caller holds @xprt's send lock to prevent activity on this
* transport while the connection is torn down.
*/
-static void
-xprt_rdma_close(struct rpc_xprt *xprt)
+void xprt_rdma_close(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- dprintk("RPC: %s: closing xprt %p\n", __func__, xprt);
+ might_sleep();
+
+ trace_xprtrdma_op_close(r_xprt);
+
+ /* Prevent marshaling and sending of new requests */
+ xprt_clear_connected(xprt);
if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
- xprt_clear_connected(xprt);
rpcrdma_ia_remove(ia);
- return;
+ goto out;
}
+
if (ep->rep_connected == -ENODEV)
return;
if (ep->rep_connected > 0)
xprt->reestablish_timeout = 0;
- xprt_disconnect_done(xprt);
rpcrdma_ep_disconnect(ep, ia);
/* Prepare @xprt for the next connection by reinitializing
@@ -463,6 +462,10 @@ xprt_rdma_close(struct rpc_xprt *xprt)
*/
r_xprt->rx_buf.rb_credits = 1;
xprt->cwnd = RPC_CWNDSHIFT;
+
+out:
+ ++xprt->connect_cookie;
+ xprt_disconnect_done(xprt);
}
/**
@@ -525,6 +528,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ trace_xprtrdma_op_connect(r_xprt);
if (r_xprt->rx_ep.rep_connected != 0) {
/* Reconnect */
schedule_delayed_work(&r_xprt->rx_connect_worker,
@@ -659,11 +663,11 @@ xprt_rdma_allocate(struct rpc_task *task)
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
- trace_xprtrdma_allocate(task, req);
+ trace_xprtrdma_op_allocate(task, req);
return 0;
out_fail:
- trace_xprtrdma_allocate(task, NULL);
+ trace_xprtrdma_op_allocate(task, NULL);
return -ENOMEM;
}
@@ -682,7 +686,7 @@ xprt_rdma_free(struct rpc_task *task)
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
rpcrdma_release_rqst(r_xprt, req);
- trace_xprtrdma_rpc_done(task, req);
+ trace_xprtrdma_op_free(task, req);
}
/**
@@ -696,8 +700,10 @@ xprt_rdma_free(struct rpc_task *task)
* %-ENOTCONN if the caller should reconnect and call again
* %-EAGAIN if the caller should call again
* %-ENOBUFS if the caller should call again after a delay
- * %-EIO if a permanent error occurred and the request was not
- * sent. Do not try to send this message again.
+ * %-EMSGSIZE if encoding ran out of buffer space. The request
+ * was not sent. Do not try to send this message again.
+ * %-EIO if an I/O error occurred. The request was not sent.
+ * Do not try to send this message again.
*/
static int
xprt_rdma_send_request(struct rpc_rqst *rqst)
@@ -713,7 +719,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
if (!xprt_connected(xprt))
- goto drop_connection;
+ return -ENOTCONN;
if (!xprt_request_get_cong(xprt, rqst))
return -EBADSLT;
@@ -745,8 +751,8 @@ failed_marshal:
if (rc != -ENOTCONN)
return rc;
drop_connection:
- xprt_disconnect_done(xprt);
- return -ENOTCONN; /* implies disconnect */
+ xprt_rdma_close(xprt);
+ return -ENOTCONN;
}
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
@@ -827,7 +833,6 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
.inject_disconnect = xprt_rdma_inject_disconnect,
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
.bc_setup = xprt_rdma_bc_setup,
- .bc_up = xprt_rdma_bc_up,
.bc_maxpayload = xprt_rdma_bc_maxpayload,
.bc_free_rqst = xprt_rdma_bc_free_rqst,
.bc_destroy = xprt_rdma_bc_destroy,
@@ -844,58 +849,31 @@ static struct xprt_class xprt_rdma = {
void xprt_rdma_cleanup(void)
{
- int rc;
-
- dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);
sunrpc_table_header = NULL;
}
#endif
- rc = xprt_unregister_transport(&xprt_rdma);
- if (rc)
- dprintk("RPC: %s: xprt_unregister returned %i\n",
- __func__, rc);
-
- rpcrdma_destroy_wq();
- rc = xprt_unregister_transport(&xprt_rdma_bc);
- if (rc)
- dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
- __func__, rc);
+ xprt_unregister_transport(&xprt_rdma);
+ xprt_unregister_transport(&xprt_rdma_bc);
}
int xprt_rdma_init(void)
{
int rc;
- rc = rpcrdma_alloc_wq();
- if (rc)
- return rc;
-
rc = xprt_register_transport(&xprt_rdma);
- if (rc) {
- rpcrdma_destroy_wq();
+ if (rc)
return rc;
- }
rc = xprt_register_transport(&xprt_rdma_bc);
if (rc) {
xprt_unregister_transport(&xprt_rdma);
- rpcrdma_destroy_wq();
return rc;
}
- dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
-
- dprintk("Defaults:\n");
- dprintk("\tSlots %d\n"
- "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
- xprt_rdma_slot_table_entries,
- xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
- dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
-
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (!sunrpc_table_header)
sunrpc_table_header = register_sysctl_table(sunrpc_table);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3ddba94c939f..7749a2bf6887 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -78,53 +78,25 @@ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
+static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
-struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
-
-int
-rpcrdma_alloc_wq(void)
-{
- struct workqueue_struct *recv_wq;
-
- recv_wq = alloc_workqueue("xprtrdma_receive",
- WQ_MEM_RECLAIM | WQ_HIGHPRI,
- 0);
- if (!recv_wq)
- return -ENOMEM;
-
- rpcrdma_receive_wq = recv_wq;
- return 0;
-}
-
-void
-rpcrdma_destroy_wq(void)
-{
- struct workqueue_struct *wq;
-
- if (rpcrdma_receive_wq) {
- wq = rpcrdma_receive_wq;
- rpcrdma_receive_wq = NULL;
- destroy_workqueue(wq);
- }
-}
-
-/**
- * rpcrdma_disconnect_worker - Force a disconnect
- * @work: endpoint to be disconnected
- *
- * Provider callbacks can possibly run in an IRQ context. This function
- * is invoked in a worker thread to guarantee that disconnect wake-up
- * calls are always done in process context.
+/* Wait for outstanding transport work to finish.
*/
-static void
-rpcrdma_disconnect_worker(struct work_struct *work)
+static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
{
- struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep,
- rep_disconnect_worker.work);
- struct rpcrdma_xprt *r_xprt =
- container_of(ep, struct rpcrdma_xprt, rx_ep);
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- xprt_force_disconnect(&r_xprt->rx_xprt);
+ /* Flush Receives, then wait for deferred Reply work
+ * to complete.
+ */
+ ib_drain_qp(ia->ri_id->qp);
+ drain_workqueue(buf->rb_completion_wq);
+
+ /* Deferred Reply processing might have scheduled
+ * local invalidations.
+ */
+ ib_drain_sq(ia->ri_id->qp);
}
/**
@@ -143,15 +115,6 @@ rpcrdma_qp_event_handler(struct ib_event *event, void *context)
rx_ep);
trace_xprtrdma_qp_event(r_xprt, event);
- pr_err("rpcrdma: %s on device %s connected to %s:%s\n",
- ib_event_msg(event->event), event->device->name,
- rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
-
- if (ep->rep_connected == 1) {
- ep->rep_connected = -EIO;
- schedule_delayed_work(&ep->rep_disconnect_worker, 0);
- wake_up_all(&ep->rep_connect_wait);
- }
}
/**
@@ -189,11 +152,13 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
rr_cqe);
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
- /* WARNING: Only wr_id and status are reliable at this point */
+ /* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_receive(wc);
+ --r_xprt->rx_ep.rep_receive_count;
if (wc->status != IB_WC_SUCCESS)
- goto out_fail;
+ goto out_flushed;
/* status == SUCCESS means all fields in wc are trustworthy */
rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
@@ -204,17 +169,16 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rdmab_addr(rep->rr_rdmabuf),
wc->byte_len, DMA_FROM_DEVICE);
-out_schedule:
+ rpcrdma_post_recvs(r_xprt, false);
rpcrdma_reply_handler(rep);
return;
-out_fail:
+out_flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
- rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0);
- goto out_schedule;
+ rpcrdma_recv_buffer_put(rep);
}
static void
@@ -316,7 +280,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->rep_connected = -EAGAIN;
goto disconnected;
case RDMA_CM_EVENT_DISCONNECTED:
- ++xprt->connect_cookie;
ep->rep_connected = -ECONNABORTED;
disconnected:
xprt_force_disconnect(xprt);
@@ -326,10 +289,9 @@ disconnected:
break;
}
- dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__,
+ dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__,
rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
- ia->ri_device->name, ia->ri_ops->ro_displayname,
- rdma_event_msg(event->event));
+ ia->ri_device->name, rdma_event_msg(event->event));
return 0;
}
@@ -347,22 +309,15 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler,
xprt, RDMA_PS_TCP, IB_QPT_RC);
- if (IS_ERR(id)) {
- rc = PTR_ERR(id);
- dprintk("RPC: %s: rdma_create_id() failed %i\n",
- __func__, rc);
+ if (IS_ERR(id))
return id;
- }
ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_addr(id, NULL,
(struct sockaddr *)&xprt->rx_xprt.addr,
RDMA_RESOLVE_TIMEOUT);
- if (rc) {
- dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
- __func__, rc);
+ if (rc)
goto out;
- }
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
trace_xprtrdma_conn_tout(xprt);
@@ -375,11 +330,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
- if (rc) {
- dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
- __func__, rc);
+ if (rc)
goto out;
- }
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) {
trace_xprtrdma_conn_tout(xprt);
@@ -429,16 +381,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
switch (xprt_rdma_memreg_strategy) {
case RPCRDMA_FRWR:
- if (frwr_is_supported(ia)) {
- ia->ri_ops = &rpcrdma_frwr_memreg_ops;
- break;
- }
- /*FALLTHROUGH*/
- case RPCRDMA_MTHCAFMR:
- if (fmr_is_supported(ia)) {
- ia->ri_ops = &rpcrdma_fmr_memreg_ops;
+ if (frwr_is_supported(ia))
break;
- }
/*FALLTHROUGH*/
default:
pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
@@ -481,7 +425,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
* connection is already gone.
*/
if (ia->ri_id->qp) {
- ib_drain_qp(ia->ri_id->qp);
+ rpcrdma_xprt_drain(r_xprt);
rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL;
}
@@ -552,7 +496,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
}
ia->ri_max_send_sges = max_sge;
- rc = ia->ri_ops->ro_open(ia, ep, cdata);
+ rc = frwr_open(ia, ep, cdata);
if (rc)
return rc;
@@ -579,16 +523,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
cdata->max_requests >> 2);
ep->rep_send_count = ep->rep_send_batch;
init_waitqueue_head(&ep->rep_connect_wait);
- INIT_DELAYED_WORK(&ep->rep_disconnect_worker,
- rpcrdma_disconnect_worker);
+ ep->rep_receive_count = 0;
sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
1, IB_POLL_WORKQUEUE);
if (IS_ERR(sendcq)) {
rc = PTR_ERR(sendcq);
- dprintk("RPC: %s: failed to create send CQ: %i\n",
- __func__, rc);
goto out1;
}
@@ -597,8 +538,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
0, IB_POLL_WORKQUEUE);
if (IS_ERR(recvcq)) {
rc = PTR_ERR(recvcq);
- dprintk("RPC: %s: failed to create recv CQ: %i\n",
- __func__, rc);
goto out2;
}
@@ -611,7 +550,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Prepare RDMA-CM private message */
pmsg->cp_magic = rpcrdma_cmp_magic;
pmsg->cp_version = RPCRDMA_CMP_VERSION;
- pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok;
+ pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK;
pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
ep->rep_remote_cma.private_data = pmsg;
@@ -653,8 +592,6 @@ out1:
void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- cancel_delayed_work_sync(&ep->rep_disconnect_worker);
-
if (ia->ri_id && ia->ri_id->qp) {
rpcrdma_ep_disconnect(ep, ia);
rdma_destroy_qp(ia->ri_id);
@@ -740,11 +677,8 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
}
err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
- if (err) {
- dprintk("RPC: %s: rdma_create_qp returned %d\n",
- __func__, err);
+ if (err)
goto out_destroy;
- }
/* Atomically replace the transport's ID and QP. */
rc = 0;
@@ -775,8 +709,6 @@ retry:
dprintk("RPC: %s: connecting...\n", __func__);
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
if (rc) {
- dprintk("RPC: %s: rdma_create_qp failed %i\n",
- __func__, rc);
rc = -ENETUNREACH;
goto out_noupdate;
}
@@ -798,11 +730,8 @@ retry:
rpcrdma_post_recvs(r_xprt, true);
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
- if (rc) {
- dprintk("RPC: %s: rdma_connect() failed with %i\n",
- __func__, rc);
+ if (rc)
goto out;
- }
wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
if (ep->rep_connected <= 0) {
@@ -822,8 +751,10 @@ out_noupdate:
return rc;
}
-/*
- * rpcrdma_ep_disconnect
+/**
+ * rpcrdma_ep_disconnect - Disconnect underlying transport
+ * @ep: endpoint to disconnect
+ * @ia: associated interface adapter
*
* This is separate from destroy to facilitate the ability
* to reconnect without recreating the endpoint.
@@ -834,19 +765,20 @@ out_noupdate:
void
rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
+ struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
+ rx_ep);
int rc;
+ /* returns without wait if ID is not connected */
rc = rdma_disconnect(ia->ri_id);
if (!rc)
- /* returns without wait if not connected */
wait_event_interruptible(ep->rep_connect_wait,
ep->rep_connected != 1);
else
ep->rep_connected = rc;
- trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
- rx_ep), rc);
+ trace_xprtrdma_disconnect(r_xprt, rc);
- ib_drain_qp(ia->ri_id->qp);
+ rpcrdma_xprt_drain(r_xprt);
}
/* Fixed-size circular FIFO queue. This implementation is wait-free and
@@ -1034,7 +966,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
if (!mr)
break;
- rc = ia->ri_ops->ro_init_mr(ia, mr);
+ rc = frwr_init_mr(ia, mr);
if (rc) {
kfree(mr);
break;
@@ -1089,9 +1021,9 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
req->rl_buffer = buffer;
INIT_LIST_HEAD(&req->rl_registered);
- spin_lock(&buffer->rb_reqslock);
+ spin_lock(&buffer->rb_lock);
list_add(&req->rl_all, &buffer->rb_allreqs);
- spin_unlock(&buffer->rb_reqslock);
+ spin_unlock(&buffer->rb_lock);
return req;
}
@@ -1134,8 +1066,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
out_free:
kfree(rep);
out:
- dprintk("RPC: %s: reply buffer %d alloc failed\n",
- __func__, rc);
return rc;
}
@@ -1159,7 +1089,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
INIT_LIST_HEAD(&buf->rb_send_bufs);
INIT_LIST_HEAD(&buf->rb_allreqs);
- spin_lock_init(&buf->rb_reqslock);
for (i = 0; i < buf->rb_max_requests; i++) {
struct rpcrdma_req *req;
@@ -1174,13 +1103,19 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
}
buf->rb_credits = 1;
- buf->rb_posted_receives = 0;
INIT_LIST_HEAD(&buf->rb_recv_bufs);
rc = rpcrdma_sendctxs_create(r_xprt);
if (rc)
goto out;
+ buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI,
+ 0,
+ r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]);
+ if (!buf->rb_completion_wq)
+ goto out;
+
return 0;
out:
rpcrdma_buffer_destroy(buf);
@@ -1194,9 +1129,18 @@ rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
kfree(rep);
}
+/**
+ * rpcrdma_req_destroy - Destroy an rpcrdma_req object
+ * @req: unused object to be destroyed
+ *
+ * This function assumes that the caller prevents concurrent device
+ * unload and transport tear-down.
+ */
void
-rpcrdma_destroy_req(struct rpcrdma_req *req)
+rpcrdma_req_destroy(struct rpcrdma_req *req)
{
+ list_del(&req->rl_all);
+
rpcrdma_free_regbuf(req->rl_recvbuf);
rpcrdma_free_regbuf(req->rl_sendbuf);
rpcrdma_free_regbuf(req->rl_rdmabuf);
@@ -1208,7 +1152,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf);
- struct rpcrdma_ia *ia = rdmab_to_ia(buf);
struct rpcrdma_mr *mr;
unsigned int count;
@@ -1224,7 +1167,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
if (!list_empty(&mr->mr_list))
list_del(&mr->mr_list);
- ia->ri_ops->ro_release_mr(mr);
+ frwr_release_mr(mr);
count++;
spin_lock(&buf->rb_mrlock);
}
@@ -1234,11 +1177,24 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
dprintk("RPC: %s: released %u MRs\n", __func__, count);
}
+/**
+ * rpcrdma_buffer_destroy - Release all hw resources
+ * @buf: root control block for resources
+ *
+ * ORDERING: relies on a prior ib_drain_qp :
+ * - No more Send or Receive completions can occur
+ * - All MRs, reps, and reqs are returned to their free lists
+ */
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
cancel_delayed_work_sync(&buf->rb_refresh_worker);
+ if (buf->rb_completion_wq) {
+ destroy_workqueue(buf->rb_completion_wq);
+ buf->rb_completion_wq = NULL;
+ }
+
rpcrdma_sendctxs_destroy(buf);
while (!list_empty(&buf->rb_recv_bufs)) {
@@ -1250,19 +1206,14 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
rpcrdma_destroy_rep(rep);
}
- spin_lock(&buf->rb_reqslock);
- while (!list_empty(&buf->rb_allreqs)) {
+ while (!list_empty(&buf->rb_send_bufs)) {
struct rpcrdma_req *req;
- req = list_first_entry(&buf->rb_allreqs,
- struct rpcrdma_req, rl_all);
- list_del(&req->rl_all);
-
- spin_unlock(&buf->rb_reqslock);
- rpcrdma_destroy_req(req);
- spin_lock(&buf->rb_reqslock);
+ req = list_first_entry(&buf->rb_send_bufs,
+ struct rpcrdma_req, rl_list);
+ list_del(&req->rl_list);
+ rpcrdma_req_destroy(req);
}
- spin_unlock(&buf->rb_reqslock);
rpcrdma_mrs_destroy(buf);
}
@@ -1329,9 +1280,12 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- trace_xprtrdma_mr_unmap(mr);
- ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ if (mr->mr_dir != DMA_NONE) {
+ trace_xprtrdma_mr_unmap(mr);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ mr->mr_dir = DMA_NONE;
+ }
__rpcrdma_mr_put(&r_xprt->rx_buf, mr);
}
@@ -1410,7 +1364,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
*
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
* receiving the payload of RDMA RECV operations. During Long Calls
- * or Replies they may be registered externally via ro_map.
+ * or Replies they may be registered externally via frwr_map.
*/
struct rpcrdma_regbuf *
rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
@@ -1446,8 +1400,10 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
(void *)rb->rg_base,
rdmab_length(rb),
rb->rg_direction);
- if (ib_dma_mapping_error(device, rdmab_addr(rb)))
+ if (ib_dma_mapping_error(device, rdmab_addr(rb))) {
+ trace_xprtrdma_dma_maperr(rdmab_addr(rb));
return false;
+ }
rb->rg_device = device;
rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
@@ -1479,10 +1435,14 @@ rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
kfree(rb);
}
-/*
- * Prepost any receive buffer, then post send.
+/**
+ * rpcrdma_ep_post - Post WRs to a transport's Send Queue
+ * @ia: transport's device information
+ * @ep: transport's RDMA endpoint information
+ * @req: rpcrdma_req containing the Send WR to post
*
- * Receive buffer is donated to hardware, reclaimed upon recv completion.
+ * Returns 0 if the post was successful, otherwise -ENOTCONN
+ * is returned.
*/
int
rpcrdma_ep_post(struct rpcrdma_ia *ia,
@@ -1501,32 +1461,27 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
--ep->rep_send_count;
}
- rc = ia->ri_ops->ro_send(ia, req);
+ rc = frwr_send(ia, req);
trace_xprtrdma_post_send(req, rc);
if (rc)
return -ENOTCONN;
return 0;
}
-/**
- * rpcrdma_post_recvs - Maybe post some Receive buffers
- * @r_xprt: controlling transport
- * @temp: when true, allocate temp rpcrdma_rep objects
- *
- */
-void
+static void
rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct ib_recv_wr *wr, *bad_wr;
int needed, count, rc;
rc = 0;
count = 0;
needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
- if (buf->rb_posted_receives > needed)
+ if (ep->rep_receive_count > needed)
goto out;
- needed -= buf->rb_posted_receives;
+ needed -= ep->rep_receive_count;
count = 0;
wr = NULL;
@@ -1574,7 +1529,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
--count;
}
}
- buf->rb_posted_receives += count;
+ ep->rep_receive_count += count;
out:
trace_xprtrdma_post_recvs(r_xprt, count, rc);
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index a13ccb643ce0..5a18472f2c9c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -66,7 +66,6 @@
* Interface Adapter -- one per transport instance
*/
struct rpcrdma_ia {
- const struct rpcrdma_memreg_ops *ri_ops;
struct ib_device *ri_device;
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
@@ -81,8 +80,6 @@ struct rpcrdma_ia {
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
- struct ib_qp_attr ri_qp_attr;
- struct ib_qp_init_attr ri_qp_init_attr;
};
enum {
@@ -101,7 +98,7 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
- struct delayed_work rep_disconnect_worker;
+ int rep_receive_count;
};
/* Pre-allocate extra Work Requests for handling backward receives
@@ -262,20 +259,12 @@ struct rpcrdma_frwr {
};
};
-struct rpcrdma_fmr {
- struct ib_fmr *fm_mr;
- u64 *fm_physaddrs;
-};
-
struct rpcrdma_mr {
struct list_head mr_list;
struct scatterlist *mr_sg;
int mr_nents;
enum dma_data_direction mr_dir;
- union {
- struct rpcrdma_fmr fmr;
- struct rpcrdma_frwr frwr;
- };
+ struct rpcrdma_frwr frwr;
struct rpcrdma_xprt *mr_xprt;
u32 mr_handle;
u32 mr_length;
@@ -401,20 +390,18 @@ struct rpcrdma_buffer {
spinlock_t rb_lock; /* protect buf lists */
struct list_head rb_send_bufs;
struct list_head rb_recv_bufs;
+ struct list_head rb_allreqs;
+
unsigned long rb_flags;
u32 rb_max_requests;
u32 rb_credits; /* most recent credit grant */
- int rb_posted_receives;
u32 rb_bc_srv_max_requests;
- spinlock_t rb_reqslock; /* protect rb_allreqs */
- struct list_head rb_allreqs;
-
u32 rb_bc_max_requests;
+ struct workqueue_struct *rb_completion_wq;
struct delayed_work rb_refresh_worker;
};
-#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
/* rb_flags */
enum {
@@ -465,35 +452,6 @@ struct rpcrdma_stats {
};
/*
- * Per-registration mode operations
- */
-struct rpcrdma_xprt;
-struct rpcrdma_memreg_ops {
- struct rpcrdma_mr_seg *
- (*ro_map)(struct rpcrdma_xprt *,
- struct rpcrdma_mr_seg *, int, bool,
- struct rpcrdma_mr **);
- int (*ro_send)(struct rpcrdma_ia *ia,
- struct rpcrdma_req *req);
- void (*ro_reminv)(struct rpcrdma_rep *rep,
- struct list_head *mrs);
- void (*ro_unmap_sync)(struct rpcrdma_xprt *,
- struct list_head *);
- int (*ro_open)(struct rpcrdma_ia *,
- struct rpcrdma_ep *,
- struct rpcrdma_create_data_internal *);
- size_t (*ro_maxpages)(struct rpcrdma_xprt *);
- int (*ro_init_mr)(struct rpcrdma_ia *,
- struct rpcrdma_mr *);
- void (*ro_release_mr)(struct rpcrdma_mr *mr);
- const char *ro_displayname;
- const int ro_send_w_inv_ok;
-};
-
-extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
-extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
-
-/*
* RPCRDMA transport -- encapsulates the structures above for
* integration with RPC.
*
@@ -544,10 +502,6 @@ extern unsigned int xprt_rdma_memreg_strategy;
int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *);
-bool frwr_is_supported(struct rpcrdma_ia *);
-bool fmr_is_supported(struct rpcrdma_ia *);
-
-extern struct workqueue_struct *rpcrdma_receive_wq;
/*
* Endpoint calls - xprtrdma/verbs.c
@@ -560,13 +514,12 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
/*
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
-void rpcrdma_destroy_req(struct rpcrdma_req *);
+void rpcrdma_req_destroy(struct rpcrdma_req *req);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
@@ -604,9 +557,6 @@ rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
return __rpcrdma_dma_map_regbuf(ia, rb);
}
-int rpcrdma_alloc_wq(void);
-void rpcrdma_destroy_wq(void);
-
/*
* Wrappers for chunk registration, shared by read/write chunk code.
*/
@@ -617,6 +567,23 @@ rpcrdma_data_dir(bool writing)
return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
}
+/* Memory registration calls xprtrdma/frwr_ops.c
+ */
+bool frwr_is_supported(struct rpcrdma_ia *);
+int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+ struct rpcrdma_create_data_internal *cdata);
+int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
+void frwr_release_mr(struct rpcrdma_mr *mr);
+size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
+struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
+ struct rpcrdma_mr_seg *seg,
+ int nsegs, bool writing, u32 xid,
+ struct rpcrdma_mr **mr);
+int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
+void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
+void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt,
+ struct list_head *mrs);
+
/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/
@@ -653,6 +620,7 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
extern unsigned int xprt_rdma_max_inline_read;
void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void xprt_rdma_close(struct rpc_xprt *xprt);
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
@@ -661,7 +629,6 @@ void xprt_rdma_cleanup(void);
*/
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
-int xprt_rdma_bc_up(struct svc_serv *, struct net *);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index f0b3700cec95..13559e6a460b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -68,8 +68,6 @@ static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-
#define XS_TCP_LINGER_TO (15U * HZ)
static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
@@ -159,8 +157,6 @@ static struct ctl_table sunrpc_table[] = {
{ },
};
-#endif
-
/*
* Wait duration for a reply from the RPC portmapper.
*/
@@ -1400,17 +1396,6 @@ static void xs_tcp_force_close(struct rpc_xprt *xprt)
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
-{
- int ret;
-
- ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
- SVC_SOCK_ANONYMOUS);
- if (ret < 0)
- return ret;
- return 0;
-}
-
static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
{
return PAGE_SIZE;
@@ -1600,6 +1585,7 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
/**
* xs_udp_timer - called when a retransmit timeout occurs on a UDP transport
+ * @xprt: controlling transport
* @task: task that timed out
*
* Adjust the congestion window after a retransmit timeout has occurred.
@@ -2257,6 +2243,7 @@ out:
/**
* xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
+ * @work: queued work item
*
* Invoked by a work queue tasklet.
*/
@@ -2665,7 +2652,6 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.inject_disconnect = xs_inject_disconnect,
#ifdef CONFIG_SUNRPC_BACKCHANNEL
.bc_setup = xprt_setup_bc,
- .bc_up = xs_tcp_bc_up,
.bc_maxpayload = xs_tcp_bc_maxpayload,
.bc_free_rqst = xprt_free_bc_rqst,
.bc_destroy = xprt_destroy_bc,
@@ -3107,10 +3093,8 @@ static struct xprt_class xs_bc_tcp_transport = {
*/
int init_socket_xprt(void)
{
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (!sunrpc_table_header)
sunrpc_table_header = register_sysctl_table(sunrpc_table);
-#endif
xprt_register_transport(&xs_local_transport);
xprt_register_transport(&xs_udp_transport);
@@ -3126,12 +3110,10 @@ int init_socket_xprt(void)
*/
void cleanup_socket_xprt(void)
{
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);
sunrpc_table_header = NULL;
}
-#endif
xprt_unregister_transport(&xs_local_transport);
xprt_unregister_transport(&xs_udp_transport);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index fb2c0d8f359f..d27f30a9a01d 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -319,7 +319,6 @@ static int tipc_enable_bearer(struct net *net, const char *name,
res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
if (res) {
bearer_disable(net, b);
- kfree(b);
errstr = "failed to create discoverer";
goto rejected;
}
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 21f6ccc89401..40f5cae623a7 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -904,6 +904,8 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock)
hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI,
TIPC_NL_PUBL_GET);
+ if (!hdr)
+ return -EMSGSIZE;
nest = nla_nest_start(args, TIPC_NLA_SOCK);
if (!nest) {