diff options
Diffstat (limited to 'net')
40 files changed, 1096 insertions, 450 deletions
diff --git a/net/Kconfig b/net/Kconfig index e24fa0873f32..e330594d3709 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -213,6 +213,7 @@ source "net/phonet/Kconfig" source "net/ieee802154/Kconfig" source "net/sched/Kconfig" source "net/dcb/Kconfig" +source "net/dns_resolver/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index 41d420070a38..ea60fbce9b1b 100644 --- a/net/Makefile +++ b/net/Makefile @@ -67,3 +67,4 @@ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o endif obj-$(CONFIG_WIMAX) += wimax/ +obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ diff --git a/net/core/dev.c b/net/core/dev.c index 3721fbb9a83c..b9b22a3c4c8f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2058,16 +2058,16 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { int queue_index; - struct sock *sk = skb->sk; + const struct net_device_ops *ops = dev->netdev_ops; - queue_index = sk_tx_queue_get(sk); - if (queue_index < 0) { - const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); + } else { + struct sock *sk = skb->sk; + queue_index = sk_tx_queue_get(sk); + if (queue_index < 0) { - if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb); - queue_index = dev_cap_txqueue(dev, queue_index); - } else { queue_index = 0; if (dev->real_num_tx_queues > 1) queue_index = skb_tx_hash(dev, skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 26396ff67cf9..c83b421341c0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2706,7 +2706,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) } else if (skb_gro_len(p) != pinfo->gso_size) return -E2BIG; - headroom = NET_SKB_PAD + NET_IP_ALIGN; + headroom = skb_headroom(p); nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); if (unlikely(!nskb)) return -ENOMEM; diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig new file mode 100644 index 000000000000..50d49f7e0472 --- /dev/null +++ b/net/dns_resolver/Kconfig @@ -0,0 +1,27 @@ +# +# Configuration for DNS Resolver +# +config DNS_RESOLVER + tristate "DNS Resolver support" + depends on NET && KEYS + help + Saying Y here will include support for the DNS Resolver key type + which can be used to make upcalls to perform DNS lookups in + userspace. + + DNS Resolver is used to query DNS server for information. Examples + being resolving a UNC hostname element to an IP address for CIFS or + performing a DNS query for AFSDB records so that AFS can locate a + cell's volume location database servers. + + DNS Resolver is used by the CIFS and AFS modules, and would support + SMB2 later. DNS Resolver is supported by the userspace upcall + helper "/sbin/dns.resolver" via /etc/request-key.conf. + + See <file:Documentation/networking/dns_resolver.txt> for further + information. + + To compile this as a module, choose M here: the module will be called + dnsresolver. + + If unsure, say N. diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile new file mode 100644 index 000000000000..c0ef4e71dc49 --- /dev/null +++ b/net/dns_resolver/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Linux DNS Resolver. +# + +obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o + +dns_resolver-objs := dns_key.o dns_query.o diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c new file mode 100644 index 000000000000..739435a6af39 --- /dev/null +++ b/net/dns_resolver/dns_key.c @@ -0,0 +1,293 @@ +/* Key type used to cache DNS lookups made by the kernel + * + * See Documentation/networking/dns_resolver.txt + * + * Copyright (c) 2007 Igor Mammedov + * Author(s): Igor Mammedov (niallain@gmail.com) + * Steve French (sfrench@us.ibm.com) + * Wang Lei (wang840925@gmail.com) + * David Howells (dhowells@redhat.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/keyctl.h> +#include <linux/err.h> +#include <linux/seq_file.h> +#include <keys/dns_resolver-type.h> +#include <keys/user-type.h> +#include "internal.h" + +MODULE_DESCRIPTION("DNS Resolver"); +MODULE_AUTHOR("Wang Lei"); +MODULE_LICENSE("GPL"); + +unsigned dns_resolver_debug; +module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(debug, "DNS Resolver debugging mask"); + +const struct cred *dns_resolver_cache; + +#define DNS_ERRORNO_OPTION "dnserror" + +/* + * Instantiate a user defined key for dns_resolver. + * + * The data must be a NUL-terminated string, with the NUL char accounted in + * datalen. + * + * If the data contains a '#' characters, then we take the clause after each + * one to be an option of the form 'key=value'. The actual data of interest is + * the string leading up to the first '#'. For instance: + * + * "ip1,ip2,...#foo=bar" + */ +static int +dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) +{ + struct user_key_payload *upayload; + unsigned long derrno; + int ret; + size_t result_len = 0; + const char *data = _data, *end, *opt; + + kenter("%%%d,%s,'%s',%zu", + key->serial, key->description, data, datalen); + + if (datalen <= 1 || !data || data[datalen - 1] != '\0') + return -EINVAL; + datalen--; + + /* deal with any options embedded in the data */ + end = data + datalen; + opt = memchr(data, '#', datalen); + if (!opt) { + /* no options: the entire data is the result */ + kdebug("no options"); + result_len = datalen; + } else { + const char *next_opt; + + result_len = opt - data; + opt++; + kdebug("options: '%s'", opt); + do { + const char *eq; + int opt_len, opt_nlen, opt_vlen, tmp; + + next_opt = memchr(opt, '#', end - opt) ?: end; + opt_len = next_opt - opt; + if (!opt_len) { + printk(KERN_WARNING + "Empty option to dns_resolver key %d\n", + key->serial); + return -EINVAL; + } + + eq = memchr(opt, '=', opt_len) ?: end; + opt_nlen = eq - opt; + eq++; + opt_vlen = next_opt - eq; /* will be -1 if no value */ + + tmp = opt_vlen >= 0 ? opt_vlen : 0; + kdebug("option '%*.*s' val '%*.*s'", + opt_nlen, opt_nlen, opt, tmp, tmp, eq); + + /* see if it's an error number representing a DNS error + * that's to be recorded as the result in this key */ + if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 && + memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) { + kdebug("dns error number option"); + if (opt_vlen <= 0) + goto bad_option_value; + + ret = strict_strtoul(eq, 10, &derrno); + if (ret < 0) + goto bad_option_value; + + if (derrno < 1 || derrno > 511) + goto bad_option_value; + + kdebug("dns error no. = %lu", derrno); + key->type_data.x[0] = -derrno; + continue; + } + + bad_option_value: + printk(KERN_WARNING + "Option '%*.*s' to dns_resolver key %d:" + " bad/missing value\n", + opt_nlen, opt_nlen, opt, key->serial); + return -EINVAL; + } while (opt = next_opt + 1, opt < end); + } + + /* don't cache the result if we're caching an error saying there's no + * result */ + if (key->type_data.x[0]) { + kleave(" = 0 [h_error %ld]", key->type_data.x[0]); + return 0; + } + + kdebug("store result"); + ret = key_payload_reserve(key, result_len); + if (ret < 0) + return -EINVAL; + + upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL); + if (!upayload) { + kleave(" = -ENOMEM"); + return -ENOMEM; + } + + upayload->datalen = result_len; + memcpy(upayload->data, data, result_len); + upayload->data[result_len] = '\0'; + rcu_assign_pointer(key->payload.data, upayload); + + kleave(" = 0"); + return 0; +} + +/* + * The description is of the form "[<type>:]<domain_name>" + * + * The domain name may be a simple name or an absolute domain name (which + * should end with a period). The domain name is case-independent. + */ +static int +dns_resolver_match(const struct key *key, const void *description) +{ + int slen, dlen, ret = 0; + const char *src = key->description, *dsp = description; + + kenter("%s,%s", src, dsp); + + if (!src || !dsp) + goto no_match; + + if (strcasecmp(src, dsp) == 0) + goto matched; + + slen = strlen(src); + dlen = strlen(dsp); + if (slen <= 0 || dlen <= 0) + goto no_match; + if (src[slen - 1] == '.') + slen--; + if (dsp[dlen - 1] == '.') + dlen--; + if (slen != dlen || strncasecmp(src, dsp, slen) != 0) + goto no_match; + +matched: + ret = 1; +no_match: + kleave(" = %d", ret); + return ret; +} + +/* + * Describe a DNS key + */ +static void dns_resolver_describe(const struct key *key, struct seq_file *m) +{ + int err = key->type_data.x[0]; + + seq_puts(m, key->description); + if (err) + seq_printf(m, ": %d", err); + else + seq_printf(m, ": %u", key->datalen); +} + +struct key_type key_type_dns_resolver = { + .name = "dns_resolver", + .instantiate = dns_resolver_instantiate, + .match = dns_resolver_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = dns_resolver_describe, + .read = user_read, +}; + +static int __init init_dns_resolver(void) +{ + struct cred *cred; + struct key *keyring; + int ret; + + printk(KERN_NOTICE "Registering the %s key type\n", + key_type_dns_resolver.name); + + /* create an override credential set with a special thread keyring in + * which DNS requests are cached + * + * this is used to prevent malicious redirections from being installed + * with add_key(). + */ + cred = prepare_kernel_cred(NULL); + if (!cred) + return -ENOMEM; + + keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA); + if (IS_ERR(keyring)) { + ret = PTR_ERR(keyring); + goto failed_put_cred; + } + + ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); + if (ret < 0) + goto failed_put_key; + + ret = register_key_type(&key_type_dns_resolver); + if (ret < 0) + goto failed_put_key; + + /* instruct request_key() to use this special keyring as a cache for + * the results it looks up */ + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + dns_resolver_cache = cred; + + kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); + return 0; + +failed_put_key: + key_put(keyring); +failed_put_cred: + put_cred(cred); + return ret; +} + +static void __exit exit_dns_resolver(void) +{ + key_revoke(dns_resolver_cache->thread_keyring); + unregister_key_type(&key_type_dns_resolver); + put_cred(dns_resolver_cache); + printk(KERN_NOTICE "Unregistered %s key type\n", + key_type_dns_resolver.name); +} + +module_init(init_dns_resolver) +module_exit(exit_dns_resolver) +MODULE_LICENSE("GPL"); diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c new file mode 100644 index 000000000000..c32be292c7e3 --- /dev/null +++ b/net/dns_resolver/dns_query.c @@ -0,0 +1,165 @@ +/* Upcall routine, designed to work as a key type and working through + * /sbin/request-key to contact userspace when handling DNS queries. + * + * See Documentation/networking/dns_resolver.txt + * + * Copyright (c) 2007 Igor Mammedov + * Author(s): Igor Mammedov (niallain@gmail.com) + * Steve French (sfrench@us.ibm.com) + * Wang Lei (wang840925@gmail.com) + * David Howells (dhowells@redhat.com) + * + * The upcall wrapper used to make an arbitrary DNS query. + * + * This function requires the appropriate userspace tool dns.upcall to be + * installed and something like the following lines should be added to the + * /etc/request-key.conf file: + * + * create dns_resolver * * /sbin/dns.upcall %k + * + * For example to use this module to query AFSDB RR: + * + * create dns_resolver afsdb:* * /sbin/dns.afsdb %k + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/dns_resolver.h> +#include <linux/err.h> +#include <keys/dns_resolver-type.h> +#include <keys/user-type.h> + +#include "internal.h" + +/** + * dns_query - Query the DNS + * @type: Query type (or NULL for straight host->IP lookup) + * @name: Name to look up + * @namelen: Length of name + * @options: Request options (or NULL if no options) + * @_result: Where to place the returned data. + * @_expiry: Where to store the result expiry time (or NULL) + * + * The data will be returned in the pointer at *result, and the caller is + * responsible for freeing it. + * + * The description should be of the form "[<query_type>:]<domain_name>", and + * the options need to be appropriate for the query type requested. If no + * query_type is given, then the query is a straight hostname to IP address + * lookup. + * + * The DNS resolution lookup is performed by upcalling to userspace by way of + * requesting a key of type dns_resolver. + * + * Returns the size of the result on success, -ve error code otherwise. + */ +int dns_query(const char *type, const char *name, size_t namelen, + const char *options, char **_result, time_t *_expiry) +{ + struct key *rkey; + struct user_key_payload *upayload; + const struct cred *saved_cred; + size_t typelen, desclen; + char *desc, *cp; + int ret, len; + + kenter("%s,%*.*s,%zu,%s", + type, (int)namelen, (int)namelen, name, namelen, options); + + if (!name || namelen == 0 || !_result) + return -EINVAL; + + /* construct the query key description as "[<type>:]<name>" */ + typelen = 0; + desclen = 0; + if (type) { + typelen = strlen(type); + if (typelen < 1) + return -EINVAL; + desclen += typelen + 1; + } + + if (!namelen) + namelen = strlen(name); + if (namelen < 3) + return -EINVAL; + desclen += namelen + 1; + + desc = kmalloc(desclen, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + cp = desc; + if (type) { + memcpy(cp, type, typelen); + cp += typelen; + *cp++ = ':'; + } + memcpy(cp, name, namelen); + cp += namelen; + *cp = '\0'; + + if (!options) + options = ""; + kdebug("call request_key(,%s,%s)", desc, options); + + /* make the upcall, using special credentials to prevent the use of + * add_key() to preinstall malicious redirections + */ + saved_cred = override_creds(dns_resolver_cache); + rkey = request_key(&key_type_dns_resolver, desc, options); + revert_creds(saved_cred); + kfree(desc); + if (IS_ERR(rkey)) { + ret = PTR_ERR(rkey); + goto out; + } + + down_read(&rkey->sem); + rkey->perm |= KEY_USR_VIEW; + + ret = key_validate(rkey); + if (ret < 0) + goto put; + + /* If the DNS server gave an error, return that to the caller */ + ret = rkey->type_data.x[0]; + if (ret) + goto put; + + upayload = rcu_dereference_protected(rkey->payload.data, + lockdep_is_held(&rkey->sem)); + len = upayload->datalen; + + ret = -ENOMEM; + *_result = kmalloc(len + 1, GFP_KERNEL); + if (!*_result) + goto put; + + memcpy(*_result, upayload->data, len + 1); + if (_expiry) + *_expiry = rkey->expiry; + + ret = len; +put: + up_read(&rkey->sem); + key_put(rkey); +out: + kleave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(dns_query); diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h new file mode 100644 index 000000000000..189ca9e9b785 --- /dev/null +++ b/net/dns_resolver/internal.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 Wang Lei + * Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved. + * + * Internal DNS Rsolver stuff + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +/* + * dns_key.c + */ +extern const struct cred *dns_resolver_cache; + +/* + * debug tracing + */ +extern unsigned dns_resolver_debug; + +#define kdebug(FMT, ...) \ +do { \ + if (unlikely(dns_resolver_debug)) \ + printk(KERN_DEBUG "[%-6.6s] "FMT"\n", \ + current->comm, ##__VA_ARGS__); \ +} while (0) + +#define kenter(FMT, ...) kdebug("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) kdebug("<== %s()"FMT"", __func__, ##__VA_ARGS__) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f0550941df7b..721a8a37b45c 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (!inet->inet_saddr) inet->inet_saddr = rt->rt_src; /* Update source address */ - if (!inet->inet_rcv_saddr) + if (!inet->inet_rcv_saddr) { inet->inet_rcv_saddr = rt->rt_src; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } inet->inet_daddr = rt->rt_dst; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a43968918350..7d02a9f999fa 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -246,6 +246,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct fib_result res; int no_addr, rpf, accept_local; + bool dev_match; int ret; struct net *net; @@ -273,12 +274,22 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, } *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); + dev_match = false; + #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) + for (ret = 0; ret < res.fi->fib_nhs; ret++) { + struct fib_nh *nh = &res.fi->fib_nh[ret]; + + if (nh->nh_dev == dev) { + dev_match = true; + break; + } + } #else if (FIB_RES_DEV(res) == dev) + dev_match = true; #endif - { + if (dev_match) { ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; fib_res_put(&res); return ret; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 79d057a939ba..4a8e370862bc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -186,7 +186,9 @@ static inline struct tnode *node_parent_rcu(struct node *node) { struct tnode *ret = node_parent(node); - return rcu_dereference(ret); + return rcu_dereference_check(ret, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } /* Same as rcu_assign_pointer @@ -1753,7 +1755,9 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) static struct leaf *trie_firstleaf(struct trie *t) { - struct tnode *n = (struct tnode *) rcu_dereference(t->trie); + struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); if (!n) return NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3f56b6e6c6aa..6298f75d5e93 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2738,6 +2738,11 @@ slow_output: } EXPORT_SYMBOL_GPL(__ip_route_output_key); +static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) +{ + return NULL; +} + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -2746,7 +2751,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, - .check = ipv4_dst_check, + .check = ipv4_blackhole_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32e0bef60d0a..fb23c2e63b52 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk) } EXPORT_SYMBOL(udp_lib_unhash); +/* + * inet_rcv_saddr was changed, we must rehash secondary hash + */ +void udp_lib_rehash(struct sock *sk, u16 newhash) +{ + if (sk_hashed(sk)) { + struct udp_table *udptable = sk->sk_prot->h.udp_table; + struct udp_hslot *hslot, *hslot2, *nhslot2; + + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + nhslot2 = udp_hashslot2(udptable, newhash); + udp_sk(sk)->udp_portaddr_hash = newhash; + if (hslot2 != nhslot2) { + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); + /* we must lock primary chain too */ + spin_lock_bh(&hslot->lock); + + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &nhslot2->head); + nhslot2->count++; + spin_unlock(&nhslot2->lock); + + spin_unlock_bh(&hslot->lock); + } + } +} +EXPORT_SYMBOL(udp_lib_rehash); + +static void udp_v4_rehash(struct sock *sk) +{ + u16 new_hash = udp4_portaddr_hash(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); + udp_lib_rehash(sk, new_hash); +} + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -1843,6 +1886,7 @@ struct proto udp_prot = { .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7d929a22cbc2..ef371aa01ac5 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -105,9 +105,12 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } goto out; } @@ -181,6 +184,8 @@ ipv4_connected: if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); inet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 13ef5bc05cf5..578f3c1a16db 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -113,14 +113,6 @@ static void nf_skb_free(struct sk_buff *skb) kfree_skb(NFCT_FRAG6_CB(skb)->orig); } -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf_init_frags.mem); - nf_skb_free(skb); - kfree_skb(skb); -} - /* Destruction primitives. */ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) @@ -282,66 +274,22 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, } found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. - */ - if (prev) { - int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset; - - if (i > 0) { - offset += i; - if (end <= offset) { - pr_debug("overlap\n"); - goto err; - } - if (!pskb_pull(skb, i)) { - pr_debug("Can't pull\n"); - goto err; - } - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } - - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. + /* RFC5722, Section 4: + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments, including those not yet received) MUST be silently + * discarded. */ - while (next && NFCT_FRAG6_CB(next)->offset < end) { - /* overlap is 'i' bytes */ - int i = end - NFCT_FRAG6_CB(next)->offset; - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - pr_debug("Eat head of the overlapped parts.: %d", i); - if (!pskb_pull(next, i)) - goto err; - /* next fragment */ - NFCT_FRAG6_CB(next)->offset += i; - fq->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragmnet is completely overridden with - * new one drop it. - */ - next = next->next; + /* Check for overlap with preceding fragment. */ + if (prev && + (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0) + goto discard_fq; - if (prev) - prev->next = next; - else - fq->q.fragments = next; - - fq->q.meat -= free_it->len; - frag_kfree_skb(free_it); - } - } + /* Look for overlap with succeeding segment. */ + if (next && NFCT_FRAG6_CB(next)->offset < end) + goto discard_fq; NFCT_FRAG6_CB(skb)->offset = offset; @@ -371,6 +319,8 @@ found: write_unlock(&nf_frags.lock); return 0; +discard_fq: + fq_kill(fq); err: return -1; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 545c4141b755..64cfef1b0a4c 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -149,13 +149,6 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_match); -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf->mem); - kfree_skb(skb); -} - void ip6_frag_init(struct inet_frag_queue *q, void *a) { struct frag_queue *fq = container_of(q, struct frag_queue, q); @@ -346,58 +339,22 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. + /* RFC5722, Section 4: + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments, including those not yet received) MUST be silently + * discarded. */ - if (prev) { - int i = (FRAG6_CB(prev)->offset + prev->len) - offset; - if (i > 0) { - offset += i; - if (end <= offset) - goto err; - if (!pskb_pull(skb, i)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } + /* Check for overlap with preceding fragment. */ + if (prev && + (FRAG6_CB(prev)->offset + prev->len) - offset > 0) + goto discard_fq; - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. - */ - while (next && FRAG6_CB(next)->offset < end) { - int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - if (!pskb_pull(next, i)) - goto err; - FRAG6_CB(next)->offset += i; /* next fragment */ - fq->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragment is completely overridden with - * new one drop it. - */ - next = next->next; - - if (prev) - prev->next = next; - else - fq->q.fragments = next; - - fq->q.meat -= free_it->len; - frag_kfree_skb(fq->q.net, free_it); - } - } + /* Look for overlap with succeeding segment. */ + if (next && FRAG6_CB(next)->offset < end) + goto discard_fq; FRAG6_CB(skb)->offset = offset; @@ -436,6 +393,8 @@ found: write_unlock(&ip6_frags.lock); return -1; +discard_fq: + fq_kill(fq); err: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1dd1affdead2..5acb3560ff15 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); } +static void udp_v6_rehash(struct sock *sk) +{ + u16 new_hash = udp6_portaddr_hash(sock_net(sk), + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->inet_num); + + udp_lib_rehash(sk, new_hash); +} + static inline int compute_score(struct sock *sk, struct net *net, unsigned short hnum, struct in6_addr *saddr, __be16 sport, @@ -1447,6 +1456,7 @@ struct proto udpv6_prot = { .backlog_rcv = udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index a788f9e9427d..6130f9d9dbe1 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -1102,7 +1102,7 @@ int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len) memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */ le16_to_cpus(&val_len); n+=2; - if (val_len > 1016) { + if (val_len >= 1016) { IRDA_DEBUG(2, "%s(), parameter length to long\n", __func__ ); return -RSP_INVALID_COMMAND_FORMAT; } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 6d0bd198af19..be04d46110fe 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -103,6 +103,7 @@ ieee80211_rate_control_ops_get(const char *name) struct rate_control_ops *ops; const char *alg_name; + kparam_block_sysfs_write(ieee80211_default_rc_algo); if (!name) alg_name = ieee80211_default_rc_algo; else @@ -120,6 +121,7 @@ ieee80211_rate_control_ops_get(const char *name) /* try built-in one if specific alg requested but not found */ if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT)) ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT); + kparam_unblock_sysfs_write(ieee80211_default_rc_algo); return ops; } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f8ddba48011..4c2f89df5cce 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -924,6 +924,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); skb->ipvs_property = 1; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 33b329bfc2d2..7e9af5b76d9e 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -410,7 +410,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; - struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -497,11 +496,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } - ct = (struct nf_conn *)skb->nfct; - if (ct && ct != &nf_conntrack_untracked) - ip_vs_expect_related(skb, ct, n_cp, - IPPROTO_TCP, &n_cp->dport, 1); - /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 21e1a5e9b9d3..49df6bea6a2d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -349,8 +349,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif -static void -ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) { struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conntrack_tuple new_tuple; @@ -365,11 +365,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) * real-server we will see RIP->DIP. */ new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - new_tuple.src.u3 = cp->daddr; + if (outin) + new_tuple.src.u3 = cp->daddr; + else + new_tuple.dst.u3 = cp->vaddr; /* * This will also take care of UDP and other protocols. */ - new_tuple.src.u.tcp.port = cp->dport; + if (outin) + new_tuple.src.u.tcp.port = cp->dport; + else + new_tuple.dst.u.tcp.port = cp->vport; nf_conntrack_alter_reply(ct, &new_tuple); } @@ -428,7 +434,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -506,7 +512,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7043b294bb67..8e22bd345e71 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -597,12 +597,6 @@ extern unsigned rxrpc_debug; #define dbgprintk(FMT,...) \ printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) -/* make sure we maintain the format strings, even when debugging is disabled */ -static inline __attribute__((format(printf,1,2))) -void _dbprintk(const char *fmt, ...) -{ -} - #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) @@ -655,11 +649,11 @@ do { \ } while (0) #else -#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) -#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) -#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) -#define _proto(FMT,...) _dbprintk("### "FMT ,##__VA_ARGS__) -#define _net(FMT,...) _dbprintk("@@@ "FMT ,##__VA_ARGS__) +#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) +#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) +#define _proto(FMT,...) no_printk("### "FMT ,##__VA_ARGS__) +#define _net(FMT,...) no_printk("@@@ "FMT ,##__VA_ARGS__) #endif /* diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 24b2cd555637..d344dc481ccc 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1232,6 +1232,18 @@ out: return 0; } +static bool list_has_sctp_addr(const struct list_head *list, + union sctp_addr *ipaddr) +{ + struct sctp_transport *addr; + + list_for_each_entry(addr, list, transports) { + if (sctp_cmp_addr_exact(ipaddr, &addr->ipaddr)) + return true; + } + + return false; +} /* A restart is occurring, check to make sure no new addresses * are being added as we may be under a takeover attack. */ @@ -1240,10 +1252,10 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { - struct sctp_transport *new_addr, *addr; - int found; + struct sctp_transport *new_addr; + int ret = 1; - /* Implementor's Guide - Sectin 5.2.2 + /* Implementor's Guide - Section 5.2.2 * ... * Before responding the endpoint MUST check to see if the * unexpected INIT adds new addresses to the association. If new @@ -1254,31 +1266,19 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, /* Search through all current addresses and make sure * we aren't adding any new ones. */ - new_addr = NULL; - found = 0; - list_for_each_entry(new_addr, &new_asoc->peer.transport_addr_list, - transports) { - found = 0; - list_for_each_entry(addr, &asoc->peer.transport_addr_list, - transports) { - if (sctp_cmp_addr_exact(&new_addr->ipaddr, - &addr->ipaddr)) { - found = 1; - break; - } - } - if (!found) + transports) { + if (!list_has_sctp_addr(&asoc->peer.transport_addr_list, + &new_addr->ipaddr)) { + sctp_sf_send_restart_abort(&new_addr->ipaddr, init, + commands); + ret = 0; break; - } - - /* If a new address was added, ABORT the sender. */ - if (!found && new_addr) { - sctp_sf_send_restart_abort(&new_addr->ipaddr, init, commands); + } } /* Return success if all addresses were found. */ - return found; + return ret; } /* Populate the verification/tie tags based on overlapping INIT diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 443c161eb8bd..3376d7657185 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -18,10 +18,11 @@ config SUNRPC_XPRT_RDMA If unsure, say N. config RPCSEC_GSS_KRB5 - tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL + tristate + depends on SUNRPC && CRYPTO + prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4) + default y select SUNRPC_GSS - select CRYPTO select CRYPTO_MD5 select CRYPTO_DES select CRYPTO_CBC @@ -34,7 +35,7 @@ config RPCSEC_GSS_KRB5 available from http://linux-nfs.org/. In addition, user-space Kerberos support should be installed. - If unsure, say N. + If unsure, say Y. config RPCSEC_GSS_SPKM3 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 8dc47f1d0001..36cb66022a27 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -19,6 +19,15 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif +#define RPC_CREDCACHE_DEFAULT_HASHBITS (4) +struct rpc_cred_cache { + struct hlist_head *hashtable; + unsigned int hashbits; + spinlock_t lock; +}; + +static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS; + static DEFINE_SPINLOCK(rpc_authflavor_lock); static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { &authnull_ops, /* AUTH_NULL */ @@ -29,6 +38,47 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { static LIST_HEAD(cred_unused); static unsigned long number_cred_unused; +#define MAX_HASHTABLE_BITS (10) +static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp) +{ + unsigned long num; + unsigned int nbits; + int ret; + + if (!val) + goto out_inval; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL) + goto out_inval; + nbits = fls(num); + if (num > (1U << nbits)) + nbits++; + if (nbits > MAX_HASHTABLE_BITS || nbits < 2) + goto out_inval; + *(unsigned int *)kp->arg = nbits; + return 0; +out_inval: + return -EINVAL; +} + +static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp) +{ + unsigned int nbits; + + nbits = *(unsigned int *)kp->arg; + return sprintf(buffer, "%u", 1U << nbits); +} + +#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int); + +static struct kernel_param_ops param_ops_hashtbl_sz = { + .set = param_set_hashtbl_sz, + .get = param_get_hashtbl_sz, +}; + +module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644); +MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size"); + static u32 pseudoflavor_to_flavor(u32 flavor) { if (flavor >= RPC_AUTH_MAXFLAVOR) @@ -145,16 +195,23 @@ int rpcauth_init_credcache(struct rpc_auth *auth) { struct rpc_cred_cache *new; - int i; + unsigned int hashsize; new = kmalloc(sizeof(*new), GFP_KERNEL); if (!new) - return -ENOMEM; - for (i = 0; i < RPC_CREDCACHE_NR; i++) - INIT_HLIST_HEAD(&new->hashtable[i]); + goto out_nocache; + new->hashbits = auth_hashbits; + hashsize = 1U << new->hashbits; + new->hashtable = kcalloc(hashsize, sizeof(new->hashtable[0]), GFP_KERNEL); + if (!new->hashtable) + goto out_nohashtbl; spin_lock_init(&new->lock); auth->au_credcache = new; return 0; +out_nohashtbl: + kfree(new); +out_nocache: + return -ENOMEM; } EXPORT_SYMBOL_GPL(rpcauth_init_credcache); @@ -183,11 +240,12 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache) LIST_HEAD(free); struct hlist_head *head; struct rpc_cred *cred; + unsigned int hashsize = 1U << cache->hashbits; int i; spin_lock(&rpc_credcache_lock); spin_lock(&cache->lock); - for (i = 0; i < RPC_CREDCACHE_NR; i++) { + for (i = 0; i < hashsize; i++) { head = &cache->hashtable[i]; while (!hlist_empty(head)) { cred = hlist_entry(head->first, struct rpc_cred, cr_hash); @@ -216,6 +274,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth) if (cache) { auth->au_credcache = NULL; rpcauth_clear_credcache(cache); + kfree(cache->hashtable); kfree(cache); } } @@ -297,7 +356,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, *entry, *new; unsigned int nr; - nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS); + nr = hash_long(acred->uid, cache->hashbits); rcu_read_lock(); hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { @@ -390,16 +449,16 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, } EXPORT_SYMBOL_GPL(rpcauth_init_cred); -void +struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) { - task->tk_msg.rpc_cred = get_rpccred(cred); dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + return get_rpccred(cred); } EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); -static void +static struct rpc_cred * rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; @@ -407,45 +466,43 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) .uid = 0, .gid = 0, }; - struct rpc_cred *ret; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); - ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); - if (!IS_ERR(ret)) - task->tk_msg.rpc_cred = ret; - else - task->tk_status = PTR_ERR(ret); + return auth->au_ops->lookup_cred(auth, &acred, lookupflags); } -static void +static struct rpc_cred * rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; - struct rpc_cred *ret; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, auth->au_ops->au_name); - ret = rpcauth_lookupcred(auth, lookupflags); - if (!IS_ERR(ret)) - task->tk_msg.rpc_cred = ret; - else - task->tk_status = PTR_ERR(ret); + return rpcauth_lookupcred(auth, lookupflags); } -void +static int rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) { + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_cred *new; int lookupflags = 0; if (flags & RPC_TASK_ASYNC) lookupflags |= RPCAUTH_LOOKUP_NEW; if (cred != NULL) - cred->cr_ops->crbind(task, cred, lookupflags); + new = cred->cr_ops->crbind(task, cred, lookupflags); else if (flags & RPC_TASK_ROOTCREDS) - rpcauth_bind_root_cred(task, lookupflags); + new = rpcauth_bind_root_cred(task, lookupflags); else - rpcauth_bind_new_cred(task, lookupflags); + new = rpcauth_bind_new_cred(task, lookupflags); + if (IS_ERR(new)) + return PTR_ERR(new); + if (req->rq_cred != NULL) + put_rpccred(req->rq_cred); + req->rq_cred = new; + return 0; } void @@ -484,22 +541,10 @@ out_nodestroy: } EXPORT_SYMBOL_GPL(put_rpccred); -void -rpcauth_unbindcred(struct rpc_task *task) -{ - struct rpc_cred *cred = task->tk_msg.rpc_cred; - - dprintk("RPC: %5u releasing %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); - - put_rpccred(cred); - task->tk_msg.rpc_cred = NULL; -} - __be32 * rpcauth_marshcred(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u marshaling %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); @@ -510,7 +555,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p) __be32 * rpcauth_checkverf(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u validating %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); @@ -522,7 +567,7 @@ int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u using %s cred %p to wrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); @@ -536,7 +581,7 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); @@ -550,13 +595,21 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, int rpcauth_refreshcred(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; int err; + cred = task->tk_rqstp->rq_cred; + if (cred == NULL) { + err = rpcauth_bindcred(task, task->tk_msg.rpc_cred, task->tk_flags); + if (err < 0) + goto out; + cred = task->tk_rqstp->rq_cred; + }; dprintk("RPC: %5u refreshing %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); err = cred->cr_ops->crrefresh(task); +out: if (err < 0) task->tk_status = err; return err; @@ -565,7 +618,7 @@ rpcauth_refreshcred(struct rpc_task *task) void rpcauth_invalcred(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u invalidating %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); @@ -576,7 +629,7 @@ rpcauth_invalcred(struct rpc_task *task) int rpcauth_uptodatecred(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; return cred == NULL || test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; @@ -587,14 +640,27 @@ static struct shrinker rpc_cred_shrinker = { .seeks = DEFAULT_SEEKS, }; -void __init rpcauth_init_module(void) +int __init rpcauth_init_module(void) { - rpc_init_authunix(); - rpc_init_generic_auth(); + int err; + + err = rpc_init_authunix(); + if (err < 0) + goto out1; + err = rpc_init_generic_auth(); + if (err < 0) + goto out2; register_shrinker(&rpc_cred_shrinker); + return 0; +out2: + rpc_destroy_authunix(); +out1: + return err; } void __exit rpcauth_remove_module(void) { + rpc_destroy_authunix(); + rpc_destroy_generic_auth(); unregister_shrinker(&rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 8f623b0f03dd..43162bb3b78f 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -27,7 +27,6 @@ struct generic_cred { }; static struct rpc_auth generic_auth; -static struct rpc_cred_cache generic_cred_cache; static const struct rpc_credops generic_credops; /* @@ -55,18 +54,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void) } EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); -static void -generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) +static struct rpc_cred *generic_bind_cred(struct rpc_task *task, + struct rpc_cred *cred, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; - struct rpc_cred *ret; - ret = auth->au_ops->lookup_cred(auth, acred, lookupflags); - if (!IS_ERR(ret)) - task->tk_msg.rpc_cred = ret; - else - task->tk_status = PTR_ERR(ret); + return auth->au_ops->lookup_cred(auth, acred, lookupflags); } /* @@ -159,20 +153,16 @@ out_nomatch: return 0; } -void __init rpc_init_generic_auth(void) +int __init rpc_init_generic_auth(void) { - spin_lock_init(&generic_cred_cache.lock); + return rpcauth_init_credcache(&generic_auth); } void __exit rpc_destroy_generic_auth(void) { - rpcauth_clear_credcache(&generic_cred_cache); + rpcauth_destroy_credcache(&generic_auth); } -static struct rpc_cred_cache generic_cred_cache = { - {{ NULL, },}, -}; - static const struct rpc_authops generic_auth_ops = { .owner = THIS_MODULE, .au_name = "Generic", @@ -183,7 +173,6 @@ static const struct rpc_authops generic_auth_ops = { static struct rpc_auth generic_auth = { .au_ops = &generic_auth_ops, .au_count = ATOMIC_INIT(0), - .au_credcache = &generic_cred_cache, }; static const struct rpc_credops generic_credops = { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 8da2a0e68574..dcfc66bab2bb 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -373,7 +373,7 @@ gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss static void gss_upcall_callback(struct rpc_task *task) { - struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, + struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; struct inode *inode = &gss_msg->inode->vfs_inode; @@ -502,7 +502,7 @@ static void warn_gssd(void) static inline int gss_refresh_upcall(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred, @@ -928,6 +928,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx) { dprintk("RPC: gss_free_ctx\n"); + gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); kfree(ctx); } @@ -942,13 +943,7 @@ gss_free_ctx_callback(struct rcu_head *head) static void gss_free_ctx(struct gss_cl_ctx *ctx) { - struct gss_ctx *gc_gss_ctx; - - gc_gss_ctx = rcu_dereference(ctx->gc_gss_ctx); - rcu_assign_pointer(ctx->gc_gss_ctx, NULL); call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); - if (gc_gss_ctx) - gss_delete_sec_context(&gc_gss_ctx); } static void @@ -1064,12 +1059,12 @@ out: static __be32 * gss_marshal(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_cred *cred = req->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 *cred_len; - struct rpc_rqst *req = task->tk_rqstp; u32 maj_stat = 0; struct xdr_netobj mic; struct kvec iov; @@ -1119,7 +1114,7 @@ out_put_ctx: static int gss_renew_cred(struct rpc_task *task) { - struct rpc_cred *oldcred = task->tk_msg.rpc_cred; + struct rpc_cred *oldcred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(oldcred, struct gss_cred, gc_base); @@ -1133,7 +1128,7 @@ static int gss_renew_cred(struct rpc_task *task) new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); if (IS_ERR(new)) return PTR_ERR(new); - task->tk_msg.rpc_cred = new; + task->tk_rqstp->rq_cred = new; put_rpccred(oldcred); return 0; } @@ -1161,7 +1156,7 @@ static int gss_cred_is_negative_entry(struct rpc_cred *cred) static int gss_refresh(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; int ret = 0; if (gss_cred_is_negative_entry(cred)) @@ -1172,7 +1167,7 @@ gss_refresh(struct rpc_task *task) ret = gss_renew_cred(task); if (ret < 0) goto out; - cred = task->tk_msg.rpc_cred; + cred = task->tk_rqstp->rq_cred; } if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) @@ -1191,7 +1186,7 @@ gss_refresh_null(struct rpc_task *task) static __be32 * gss_validate(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 seq; struct kvec iov; @@ -1400,7 +1395,7 @@ static int gss_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *p, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); @@ -1503,7 +1498,7 @@ static int gss_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *p, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 1db618f56ecb..a5c36c01707b 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -75,7 +75,7 @@ nul_marshal(struct rpc_task *task, __be32 *p) static int nul_refresh(struct rpc_task *task) { - set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags); return 0; } diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index aac2f8b4ee21..4cb70dc6e7ad 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -29,7 +29,6 @@ struct unx_cred { #endif static struct rpc_auth unix_auth; -static struct rpc_cred_cache unix_cred_cache; static const struct rpc_credops unix_credops; static struct rpc_auth * @@ -141,7 +140,7 @@ static __be32 * unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; - struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); + struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base); __be32 *base, *hold; int i; @@ -174,7 +173,7 @@ unx_marshal(struct rpc_task *task, __be32 *p) static int unx_refresh(struct rpc_task *task) { - set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags); return 0; } @@ -197,15 +196,20 @@ unx_validate(struct rpc_task *task, __be32 *p) printk("RPC: giant verf size: %u\n", size); return NULL; } - task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2; + task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; p += (size >> 2); return p; } -void __init rpc_init_authunix(void) +int __init rpc_init_authunix(void) { - spin_lock_init(&unix_cred_cache.lock); + return rpcauth_init_credcache(&unix_auth); +} + +void rpc_destroy_authunix(void) +{ + rpcauth_destroy_credcache(&unix_auth); } const struct rpc_authops authunix_ops = { @@ -219,17 +223,12 @@ const struct rpc_authops authunix_ops = { }; static -struct rpc_cred_cache unix_cred_cache = { -}; - -static struct rpc_auth unix_auth = { .au_cslack = UNX_WRITESLACK, .au_rslack = 2, /* assume AUTH_NULL verf */ .au_ops = &authunix_ops, .au_flavor = RPC_AUTH_UNIX, .au_count = ATOMIC_INIT(0), - .au_credcache = &unix_cred_cache, }; static diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 58de76c8540c..2b06410e584e 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -34,7 +34,6 @@ #include <linux/sunrpc/cache.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/rpc_pipe_fs.h> -#include <linux/smp_lock.h> #define RPCDBG_FACILITY RPCDBG_CACHE @@ -320,7 +319,7 @@ static struct cache_detail *current_detail; static int current_index; static void do_cache_clean(struct work_struct *work); -static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); +static struct delayed_work cache_cleaner; static void sunrpc_init_cache_detail(struct cache_detail *cd) { @@ -1504,6 +1503,11 @@ static int create_cache_proc_entries(struct cache_detail *cd) } #endif +void __init cache_initialize(void) +{ + INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); +} + int cache_register(struct cache_detail *cd) { int ret; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 756fc324db9e..2388d83b68ff 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -414,6 +414,35 @@ out_no_clnt: EXPORT_SYMBOL_GPL(rpc_clone_client); /* + * Kill all tasks for the given client. + * XXX: kill their descendants as well? + */ +void rpc_killall_tasks(struct rpc_clnt *clnt) +{ + struct rpc_task *rovr; + + + if (list_empty(&clnt->cl_tasks)) + return; + dprintk("RPC: killing all tasks for client %p\n", clnt); + /* + * Spin lock all_tasks to prevent changes... + */ + spin_lock(&clnt->cl_lock); + list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { + if (!RPC_IS_ACTIVATED(rovr)) + continue; + if (!(rovr->tk_flags & RPC_TASK_KILLED)) { + rovr->tk_flags |= RPC_TASK_KILLED; + rpc_exit(rovr, -EIO); + rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr); + } + } + spin_unlock(&clnt->cl_lock); +} +EXPORT_SYMBOL_GPL(rpc_killall_tasks); + +/* * Properly shut down an RPC client, terminating all outstanding * requests. */ @@ -538,6 +567,49 @@ out: } EXPORT_SYMBOL_GPL(rpc_bind_new_program); +void rpc_task_release_client(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + + if (clnt != NULL) { + /* Remove from client task list */ + spin_lock(&clnt->cl_lock); + list_del(&task->tk_task); + spin_unlock(&clnt->cl_lock); + task->tk_client = NULL; + + rpc_release_client(clnt); + } +} + +static +void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) +{ + if (clnt != NULL) { + rpc_task_release_client(task); + task->tk_client = clnt; + kref_get(&clnt->cl_kref); + if (clnt->cl_softrtry) + task->tk_flags |= RPC_TASK_SOFT; + /* Add to the client's list of all tasks */ + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); + } +} + +static void +rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) +{ + if (msg != NULL) { + task->tk_msg.rpc_proc = msg->rpc_proc; + task->tk_msg.rpc_argp = msg->rpc_argp; + task->tk_msg.rpc_resp = msg->rpc_resp; + if (msg->rpc_cred != NULL) + task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred); + } +} + /* * Default callback for async RPC calls */ @@ -562,6 +634,18 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) if (IS_ERR(task)) goto out; + rpc_task_set_client(task, task_setup_data->rpc_client); + rpc_task_set_rpc_message(task, task_setup_data->rpc_message); + + if (task->tk_status != 0) { + int ret = task->tk_status; + rpc_put_task(task); + return ERR_PTR(ret); + } + + if (task->tk_action == NULL) + rpc_call_start(task); + atomic_inc(&task->tk_count); rpc_execute(task); out: @@ -756,12 +840,13 @@ EXPORT_SYMBOL_GPL(rpc_force_rebind); * Restart an (async) RPC call from the call_prepare state. * Usually called from within the exit handler. */ -void +int rpc_restart_call_prepare(struct rpc_task *task) { if (RPC_ASSASSINATED(task)) - return; + return 0; task->tk_action = rpc_prepare_task; + return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); @@ -769,13 +854,13 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); * Restart an (async) RPC call. Usually called from within the * exit handler. */ -void +int rpc_restart_call(struct rpc_task *task) { if (RPC_ASSASSINATED(task)) - return; - + return 0; task->tk_action = call_start; + return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call); @@ -824,11 +909,6 @@ call_reserve(struct rpc_task *task) { dprint_status(task); - if (!rpcauth_uptodatecred(task)) { - task->tk_action = call_refresh; - return; - } - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); @@ -892,7 +972,7 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack; + unsigned int slack = task->tk_client->cl_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -900,7 +980,7 @@ call_allocate(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - task->tk_action = call_bind; + task->tk_action = call_refresh; if (req->rq_buffer) return; @@ -937,6 +1017,47 @@ call_allocate(struct rpc_task *task) rpc_exit(task, -ERESTARTSYS); } +/* + * 2a. Bind and/or refresh the credentials + */ +static void +call_refresh(struct rpc_task *task) +{ + dprint_status(task); + + task->tk_action = call_refreshresult; + task->tk_status = 0; + task->tk_client->cl_stats->rpcauthrefresh++; + rpcauth_refreshcred(task); +} + +/* + * 2b. Process the results of a credential refresh + */ +static void +call_refreshresult(struct rpc_task *task) +{ + int status = task->tk_status; + + dprint_status(task); + + task->tk_status = 0; + task->tk_action = call_bind; + if (status >= 0 && rpcauth_uptodatecred(task)) + return; + switch (status) { + case -EACCES: + rpc_exit(task, -EACCES); + return; + case -ENOMEM: + rpc_exit(task, -ENOMEM); + return; + case -ETIMEDOUT: + rpc_delay(task, 3*HZ); + } + task->tk_action = call_refresh; +} + static inline int rpc_task_need_encode(struct rpc_task *task) { @@ -1472,43 +1593,6 @@ out_retry: } } -/* - * 8. Refresh the credentials if rejected by the server - */ -static void -call_refresh(struct rpc_task *task) -{ - dprint_status(task); - - task->tk_action = call_refreshresult; - task->tk_status = 0; - task->tk_client->cl_stats->rpcauthrefresh++; - rpcauth_refreshcred(task); -} - -/* - * 8a. Process the results of a credential refresh - */ -static void -call_refreshresult(struct rpc_task *task) -{ - int status = task->tk_status; - - dprint_status(task); - - task->tk_status = 0; - task->tk_action = call_reserve; - if (status >= 0 && rpcauth_uptodatecred(task)) - return; - if (status == -EACCES) { - rpc_exit(task, -EACCES); - return; - } - task->tk_action = call_refresh; - if (status != -ETIMEDOUT) - rpc_delay(task, 3*HZ); -} - static __be32 * rpc_encode_header(struct rpc_task *task) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4a843b883b89..cace6049e4a5 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -246,17 +246,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { - struct rpc_clnt *clnt; - if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) - return; rpc_task_set_debuginfo(task); - /* Add to global list of all tasks */ - clnt = task->tk_client; - if (clnt != NULL) { - spin_lock(&clnt->cl_lock); - list_add_tail(&task->tk_task, &clnt->cl_tasks); - spin_unlock(&clnt->cl_lock); - } + set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); } /* @@ -319,11 +310,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); - if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) { - printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n"); - return; - } - __rpc_add_wait_queue(q, task); BUG_ON(task->tk_callback != NULL); @@ -334,8 +320,8 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action) { - /* Mark the task as being activated if so needed */ - rpc_set_active(task); + /* We shouldn't ever put an inactive task to sleep */ + BUG_ON(!RPC_IS_ACTIVATED(task)); /* * Protect the queue operations. @@ -406,14 +392,6 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* - * Wake up the specified task - */ -static void rpc_wake_up_task(struct rpc_task *task) -{ - rpc_wake_up_queued_task(task->tk_waitqueue, task); -} - -/* * Wake up the next task on a priority queue. */ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) @@ -600,7 +578,15 @@ void rpc_exit_task(struct rpc_task *task) } } } -EXPORT_SYMBOL_GPL(rpc_exit_task); + +void rpc_exit(struct rpc_task *task, int status) +{ + task->tk_status = status; + task->tk_action = rpc_exit_task; + if (RPC_IS_QUEUED(task)) + rpc_wake_up_queued_task(task->tk_waitqueue, task); +} +EXPORT_SYMBOL_GPL(rpc_exit); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { @@ -690,7 +676,6 @@ static void __rpc_execute(struct rpc_task *task) dprintk("RPC: %5u got signal\n", task->tk_pid); task->tk_flags |= RPC_TASK_KILLED; rpc_exit(task, -ERESTARTSYS); - rpc_wake_up_task(task); } rpc_set_running(task); dprintk("RPC: %5u sync task resuming\n", task->tk_pid); @@ -714,8 +699,9 @@ static void __rpc_execute(struct rpc_task *task) void rpc_execute(struct rpc_task *task) { rpc_set_active(task); - rpc_set_running(task); - __rpc_execute(task); + rpc_make_runnable(task); + if (!RPC_IS_ASYNC(task)) + __rpc_execute(task); } static void rpc_async_schedule(struct work_struct *work) @@ -808,26 +794,9 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta /* Initialize workqueue for async tasks */ task->tk_workqueue = task_setup_data->workqueue; - task->tk_client = task_setup_data->rpc_client; - if (task->tk_client != NULL) { - kref_get(&task->tk_client->cl_kref); - if (task->tk_client->cl_softrtry) - task->tk_flags |= RPC_TASK_SOFT; - } - if (task->tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; - if (task_setup_data->rpc_message != NULL) { - task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc; - task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp; - task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp; - /* Bind the user cred */ - rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags); - if (task->tk_action == NULL) - rpc_call_start(task); - } - /* starting timestamp */ task->tk_start = ktime_get(); @@ -896,11 +865,8 @@ void rpc_put_task(struct rpc_task *task) if (task->tk_rqstp) xprt_release(task); if (task->tk_msg.rpc_cred) - rpcauth_unbindcred(task); - if (task->tk_client) { - rpc_release_client(task->tk_client); - task->tk_client = NULL; - } + put_rpccred(task->tk_msg.rpc_cred); + rpc_task_release_client(task); if (task->tk_workqueue != NULL) { INIT_WORK(&task->u.tk_work, rpc_async_release); queue_work(task->tk_workqueue, &task->u.tk_work); @@ -913,13 +879,6 @@ static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); - if (!list_empty(&task->tk_task)) { - struct rpc_clnt *clnt = task->tk_client; - /* Remove from client task list */ - spin_lock(&clnt->cl_lock); - list_del(&task->tk_task); - spin_unlock(&clnt->cl_lock); - } BUG_ON (RPC_IS_QUEUED(task)); /* Wake up anyone who is waiting for task completion */ @@ -928,35 +887,6 @@ static void rpc_release_task(struct rpc_task *task) rpc_put_task(task); } -/* - * Kill all tasks for the given client. - * XXX: kill their descendants as well? - */ -void rpc_killall_tasks(struct rpc_clnt *clnt) -{ - struct rpc_task *rovr; - - - if (list_empty(&clnt->cl_tasks)) - return; - dprintk("RPC: killing all tasks for client %p\n", clnt); - /* - * Spin lock all_tasks to prevent changes... - */ - spin_lock(&clnt->cl_lock); - list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { - if (! RPC_IS_ACTIVATED(rovr)) - continue; - if (!(rovr->tk_flags & RPC_TASK_KILLED)) { - rovr->tk_flags |= RPC_TASK_KILLED; - rpc_exit(rovr, -EIO); - rpc_wake_up_task(rovr); - } - } - spin_unlock(&clnt->cl_lock); -} -EXPORT_SYMBOL_GPL(rpc_killall_tasks); - int rpciod_up(void) { return try_module_get(THIS_MODULE) ? 0 : -EINVAL; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f438347d817b..c0d085013a2b 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -33,21 +33,27 @@ init_sunrpc(void) if (err) goto out; err = rpc_init_mempool(); - if (err) { - unregister_rpc_pipefs(); - goto out; - } + if (err) + goto out2; + err = rpcauth_init_module(); + if (err) + goto out3; #ifdef RPC_DEBUG rpc_register_sysctl(); #endif #ifdef CONFIG_PROC_FS rpc_proc_init(); #endif + cache_initialize(); cache_register(&ip_map_cache); cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ - rpcauth_init_module(); + return 0; +out3: + rpc_destroy_mempool(); +out2: + unregister_rpc_pipefs(); out: return err; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index dcd0132396ba..970fb00f388c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1032,6 +1032,8 @@ void xprt_release(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(req->rq_buffer); + if (req->rq_cred != NULL) + put_rpccred(req->rq_cred); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); @@ -1129,6 +1131,7 @@ static void xprt_destroy(struct kref *kref) rpc_destroy_wait_queue(&xprt->sending); rpc_destroy_wait_queue(&xprt->resend); rpc_destroy_wait_queue(&xprt->backlog); + cancel_work_sync(&xprt->task_cleanup); /* * Tear down transport state and free the rpc_xprt */ diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e5e28d1946a4..2ac3f6e8adff 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, req->rl_nchunks = nchunks; BUG_ON(nchunks == 0); + BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + && (nchunks > 3)); /* * finish off header. If write, marshal discrim and nchunks. diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 27015c6d8eb5..5f4c7b3bc711 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: - /* Add room for frmr register and invalidate WRs */ - ep->rep_attr.cap.max_send_wr *= 3; - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) - return -EINVAL; + /* Add room for frmr register and invalidate WRs. + * 1. FRMR reg WR for head + * 2. FRMR invalidate WR for head + * 3. FRMR reg WR for pagelist + * 4. FRMR invalidate WR for pagelist + * 5. FRMR reg WR for tail + * 6. FRMR invalidate WR for tail + * 7. The RDMA_SEND WR + */ + ep->rep_attr.cap.max_send_wr *= 7; + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { + cdata->max_requests = devattr.max_qp_wr / 7; + if (!cdata->max_requests) + return -EINVAL; + ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; + } break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: @@ -1490,7 +1502,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, memset(&frmr_wr, 0, sizeof frmr_wr); frmr_wr.opcode = IB_WR_FAST_REG_MR; frmr_wr.send_flags = 0; /* unsignaled */ - frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; + frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; frmr_wr.wr.fast_reg.page_list_len = i; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7ca65c7005ea..b6309db56226 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1305,10 +1305,11 @@ static void xs_tcp_state_change(struct sock *sk) if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); - dprintk("RPC: state %x conn %d dead %d zapped %d\n", + dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n", sk->sk_state, xprt_connected(xprt), sock_flag(sk, SOCK_DEAD), - sock_flag(sk, SOCK_ZAPPED)); + sock_flag(sk, SOCK_ZAPPED), + sk->sk_shutdown); switch (sk->sk_state) { case TCP_ESTABLISHED: @@ -1779,10 +1780,25 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra { unsigned int state = transport->inet->sk_state; - if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) - return; - if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) - return; + if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: TCP_CLOSEd and sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } + if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: ESTABLISHED/SYN_SENT " + "sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } xs_abort_connection(xprt, transport); } @@ -2577,7 +2593,8 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_bc_tcp_transport); } -static int param_set_uint_minmax(const char *val, struct kernel_param *kp, +static int param_set_uint_minmax(const char *val, + const struct kernel_param *kp, unsigned int min, unsigned int max) { unsigned long num; @@ -2592,34 +2609,37 @@ static int param_set_uint_minmax(const char *val, struct kernel_param *kp, return 0; } -static int param_set_portnr(const char *val, struct kernel_param *kp) +static int param_set_portnr(const char *val, const struct kernel_param *kp) { return param_set_uint_minmax(val, kp, RPC_MIN_RESVPORT, RPC_MAX_RESVPORT); } -static int param_get_portnr(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} +static struct kernel_param_ops param_ops_portnr = { + .set = param_set_portnr, + .get = param_get_uint, +}; + #define param_check_portnr(name, p) \ __param_check(name, p, unsigned int); module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); -static int param_set_slot_table_size(const char *val, struct kernel_param *kp) +static int param_set_slot_table_size(const char *val, + const struct kernel_param *kp) { return param_set_uint_minmax(val, kp, RPC_MIN_SLOT_TABLE, RPC_MAX_SLOT_TABLE); } -static int param_get_slot_table_size(char *buffer, struct kernel_param *kp) -{ - return param_get_uint(buffer, kp); -} +static struct kernel_param_ops param_ops_slot_table_size = { + .set = param_set_slot_table_size, + .get = param_get_uint, +}; + #define param_check_slot_table_size(name, p) \ __param_check(name, p, unsigned int); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4414a18c63b4..0b39b2451ea5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -692,6 +692,7 @@ static int unix_autobind(struct socket *sock) static u32 ordernum = 1; struct unix_address *addr; int err; + unsigned int retries = 0; mutex_lock(&u->readlock); @@ -717,9 +718,17 @@ retry: if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, addr->hash)) { spin_unlock(&unix_table_lock); - /* Sanity yield. It is unusual case, but yet... */ - if (!(ordernum&0xFF)) - yield(); + /* + * __unix_find_socket_byname() may take long time if many names + * are already in use. + */ + cond_resched(); + /* Give up if all names seems to be in use. */ + if (retries++ == 0xFFFFF) { + err = -ENOSPC; + kfree(addr); + goto out; + } goto retry; } addr->hash ^= sk->sk_type; |