From 7f677633379b4abb3281cdbe7e7006f049305c03 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 10 Feb 2017 20:28:24 -0800
Subject: bpf: introduce BPF_F_ALLOW_OVERRIDE flag

If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
to the given cgroup the descendent cgroup will be able to override
effective bpf program that was inherited from this cgroup.
By default it's not passed, therefore override is disallowed.

Examples:
1.
prog X attached to /A with default
prog Y fails to attach to /A/B and /A/B/C
Everything under /A runs prog X

2.
prog X attached to /A with allow_override.
prog Y fails to attach to /A/B with default (non-override)
prog M attached to /A/B with allow_override.
Everything under /A/B runs prog M only.

3.
prog X attached to /A with allow_override.
prog Y fails to attach to /A with default.
The user has to detach first to switch the mode.

In the future this behavior may be extended with a chain of
non-overridable programs.

Also fix the bug where detach from cgroup where nothing is attached
was not throwing error. Return ENOENT in such case.

Add several testcases and adjust libbpf.

Fixes: 3007098494be ("cgroup: add support for eBPF programs")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf-cgroup.h | 13 ++++++-------
 include/uapi/linux/bpf.h   |  7 +++++++
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 92bc89ae7e20..c970a25d2a49 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -21,20 +21,19 @@ struct cgroup_bpf {
 	 */
 	struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
 	struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE];
+	bool disallow_override[MAX_BPF_ATTACH_TYPE];
 };
 
 void cgroup_bpf_put(struct cgroup *cgrp);
 void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
 
-void __cgroup_bpf_update(struct cgroup *cgrp,
-			 struct cgroup *parent,
-			 struct bpf_prog *prog,
-			 enum bpf_attach_type type);
+int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
+			struct bpf_prog *prog, enum bpf_attach_type type,
+			bool overridable);
 
 /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
-void cgroup_bpf_update(struct cgroup *cgrp,
-		       struct bpf_prog *prog,
-		       enum bpf_attach_type type);
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+		      enum bpf_attach_type type, bool overridable);
 
 int __cgroup_bpf_run_filter_skb(struct sock *sk,
 				struct sk_buff *skb,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0eb0e87dbe9f..d2b0ac799d03 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -116,6 +116,12 @@ enum bpf_attach_type {
 
 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 
+/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
+ * to the given target_fd cgroup the descendent cgroup will be able to
+ * override effective bpf program that was inherited from this cgroup
+ */
+#define BPF_F_ALLOW_OVERRIDE	(1U << 0)
+
 #define BPF_PSEUDO_MAP_FD	1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -171,6 +177,7 @@ union bpf_attr {
 		__u32		target_fd;	/* container object to attach to */
 		__u32		attach_bpf_fd;	/* eBPF program to attach */
 		__u32		attach_type;
+		__u32		attach_flags;
 	};
 } __attribute__((aligned(8)));
 
-- 
cgit v1.2.3


From 40137906c5f55c252194ef5834130383e639536f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 11 Feb 2017 19:26:47 +0800
Subject: rhashtable: Add nested tables

This patch adds code that handles GFP_ATOMIC kmalloc failure on
insertion.  As we cannot use vmalloc, we solve it by making our
hash table nested.  That is, we allocate single pages at each level
and reach our desired table size by nesting them.

When a nested table is created, only a single page is allocated
at the top-level.  Lower levels are allocated on demand during
insertion.  Therefore for each insertion to succeed, only two
(non-consecutive) pages are needed.

After a nested table is created, a rehash will be scheduled in
order to switch to a vmalloced table as soon as possible.  Also,
the rehash code will never rehash into a nested table.  If we
detect a nested table during a rehash, the rehash will be aborted
and a new rehash will be scheduled.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  78 +++++++++----
 lib/rhashtable.c           | 270 ++++++++++++++++++++++++++++++++++++---------
 2 files changed, 276 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 5c132d3188be..f2e12a845910 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -61,6 +61,7 @@ struct rhlist_head {
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
+ * @nest: Number of bits of first-level nested table.
  * @rehash: Current bucket being rehashed
  * @hash_rnd: Random seed to fold into hash
  * @locks_mask: Mask to apply before accessing locks[]
@@ -68,10 +69,12 @@ struct rhlist_head {
  * @walkers: List of active walkers
  * @rcu: RCU structure for freeing the table
  * @future_tbl: Table under construction during rehashing
+ * @ntbl: Nested table used when out of memory.
  * @buckets: size * hash buckets
  */
 struct bucket_table {
 	unsigned int		size;
+	unsigned int		nest;
 	unsigned int		rehash;
 	u32			hash_rnd;
 	unsigned int		locks_mask;
@@ -81,7 +84,7 @@ struct bucket_table {
 
 	struct bucket_table __rcu *future_tbl;
 
-	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
+	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
 /**
@@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void *arg);
 void rhashtable_destroy(struct rhashtable *ht);
 
+struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+					    unsigned int hash);
+struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+						   struct bucket_table *tbl,
+						   unsigned int hash);
+
 #define rht_dereference(p, ht) \
 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
 
@@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht);
 #define rht_entry(tpos, pos, member) \
 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 
+static inline struct rhash_head __rcu *const *rht_bucket(
+	const struct bucket_table *tbl, unsigned int hash)
+{
+	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
+				     &tbl->buckets[hash];
+}
+
+static inline struct rhash_head __rcu **rht_bucket_var(
+	struct bucket_table *tbl, unsigned int hash)
+{
+	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
+				     &tbl->buckets[hash];
+}
+
+static inline struct rhash_head __rcu **rht_bucket_insert(
+	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
+{
+	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
+				     &tbl->buckets[hash];
+}
+
 /**
  * rht_for_each_continue - continue iterating over hash chain
  * @pos:	the &struct rhash_head to use as a loop cursor.
@@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  * @hash:	the hash value / bucket index
  */
 #define rht_for_each(pos, tbl, hash) \
-	rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
+	rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
 
 /**
  * rht_for_each_entry_continue - continue iterating over hash chain
@@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  * @member:	name of the &struct rhash_head within the hashable struct.
  */
 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
-	rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],	\
+	rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash),	\
 				    tbl, hash, member)
 
 /**
@@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht);
  * This hash chain list-traversal primitive allows for the looped code to
  * remove the loop cursor from the list.
  */
-#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	    \
-	for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
-	     next = !rht_is_a_nulls(pos) ?				    \
-		       rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
-	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
-	     pos = next,						    \
-	     next = !rht_is_a_nulls(pos) ?				    \
+#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
+	for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
+	     next = !rht_is_a_nulls(pos) ?				      \
+		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
+	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
+	     pos = next,						      \
+	     next = !rht_is_a_nulls(pos) ?				      \
 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
 
 /**
@@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  * traversal is guarded by rcu_read_lock().
  */
 #define rht_for_each_rcu(pos, tbl, hash)				\
-	rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
+	rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
 
 /**
  * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
@@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht);
  * the _rcu mutation primitives such as rhashtable_insert() as long as the
  * traversal is guarded by rcu_read_lock().
  */
-#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		\
-	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
+	rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
 					tbl, hash, member)
 
 /**
@@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup(
 		.ht = ht,
 		.key = key,
 	};
-	const struct bucket_table *tbl;
+	struct bucket_table *tbl;
 	struct rhash_head *he;
 	unsigned int hash;
 
@@ -697,8 +727,12 @@ slow_path:
 	}
 
 	elasticity = ht->elasticity;
-	pprev = &tbl->buckets[hash];
-	rht_for_each(head, tbl, hash) {
+	pprev = rht_bucket_insert(ht, tbl, hash);
+	data = ERR_PTR(-ENOMEM);
+	if (!pprev)
+		goto out;
+
+	rht_for_each_continue(head, *pprev, tbl, hash) {
 		struct rhlist_head *plist;
 		struct rhlist_head *list;
 
@@ -736,7 +770,7 @@ slow_path:
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		goto slow_path;
 
-	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+	head = rht_dereference_bucket(*pprev, tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (rhlist) {
@@ -746,7 +780,7 @@ slow_path:
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(tbl->buckets[hash], obj);
+	rcu_assign_pointer(*pprev, obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
@@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one(
 
 	spin_lock_bh(lock);
 
-	pprev = &tbl->buckets[hash];
-	rht_for_each(he, tbl, hash) {
+	pprev = rht_bucket_var(tbl, hash);
+	rht_for_each_continue(he, *pprev, tbl, hash) {
 		struct rhlist_head *list;
 
 		list = container_of(he, struct rhlist_head, rhead);
@@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast(
 
 	spin_lock_bh(lock);
 
-	pprev = &tbl->buckets[hash];
-	rht_for_each(he, tbl, hash) {
+	pprev = rht_bucket_var(tbl, hash);
+	rht_for_each_continue(he, *pprev, tbl, hash) {
 		if (he != obj_old) {
 			pprev = &he->next;
 			continue;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 32d0ad058380..172454e6b979 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -32,6 +32,11 @@
 #define HASH_MIN_SIZE		4U
 #define BUCKET_LOCKS_PER_CPU	32UL
 
+union nested_table {
+	union nested_table __rcu *table;
+	struct rhash_head __rcu *bucket;
+};
+
 static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
 		       const struct rhash_head *he)
@@ -76,6 +81,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
 	/* Never allocate more than 0.5 locks per bucket */
 	size = min_t(unsigned int, size, tbl->size >> 1);
 
+	if (tbl->nest)
+		size = min(size, 1U << tbl->nest);
+
 	if (sizeof(spinlock_t) != 0) {
 		tbl->locks = NULL;
 #ifdef CONFIG_NUMA
@@ -99,8 +107,45 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
 	return 0;
 }
 
+static void nested_table_free(union nested_table *ntbl, unsigned int size)
+{
+	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+	const unsigned int len = 1 << shift;
+	unsigned int i;
+
+	ntbl = rcu_dereference_raw(ntbl->table);
+	if (!ntbl)
+		return;
+
+	if (size > len) {
+		size >>= shift;
+		for (i = 0; i < len; i++)
+			nested_table_free(ntbl + i, size);
+	}
+
+	kfree(ntbl);
+}
+
+static void nested_bucket_table_free(const struct bucket_table *tbl)
+{
+	unsigned int size = tbl->size >> tbl->nest;
+	unsigned int len = 1 << tbl->nest;
+	union nested_table *ntbl;
+	unsigned int i;
+
+	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
+
+	for (i = 0; i < len; i++)
+		nested_table_free(ntbl + i, size);
+
+	kfree(ntbl);
+}
+
 static void bucket_table_free(const struct bucket_table *tbl)
 {
+	if (tbl->nest)
+		nested_bucket_table_free(tbl);
+
 	if (tbl)
 		kvfree(tbl->locks);
 
@@ -112,6 +157,59 @@ static void bucket_table_free_rcu(struct rcu_head *head)
 	bucket_table_free(container_of(head, struct bucket_table, rcu));
 }
 
+static union nested_table *nested_table_alloc(struct rhashtable *ht,
+					      union nested_table __rcu **prev,
+					      unsigned int shifted,
+					      unsigned int nhash)
+{
+	union nested_table *ntbl;
+	int i;
+
+	ntbl = rcu_dereference(*prev);
+	if (ntbl)
+		return ntbl;
+
+	ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
+
+	if (ntbl && shifted) {
+		for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
+			INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
+					    (i << shifted) | nhash);
+	}
+
+	rcu_assign_pointer(*prev, ntbl);
+
+	return ntbl;
+}
+
+static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
+						      size_t nbuckets,
+						      gfp_t gfp)
+{
+	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+	struct bucket_table *tbl;
+	size_t size;
+
+	if (nbuckets < (1 << (shift + 1)))
+		return NULL;
+
+	size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
+
+	tbl = kzalloc(size, gfp);
+	if (!tbl)
+		return NULL;
+
+	if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
+				0, 0)) {
+		kfree(tbl);
+		return NULL;
+	}
+
+	tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
+
+	return tbl;
+}
+
 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 					       size_t nbuckets,
 					       gfp_t gfp)
@@ -126,10 +224,17 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 		tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
 	if (tbl == NULL && gfp == GFP_KERNEL)
 		tbl = vzalloc(size);
+
+	size = nbuckets;
+
+	if (tbl == NULL && gfp != GFP_KERNEL) {
+		tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
+		nbuckets = 0;
+	}
 	if (tbl == NULL)
 		return NULL;
 
-	tbl->size = nbuckets;
+	tbl->size = size;
 
 	if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
 		bucket_table_free(tbl);
@@ -164,12 +269,17 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	struct bucket_table *new_tbl = rhashtable_last_table(ht,
 		rht_dereference_rcu(old_tbl->future_tbl, ht));
-	struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
-	int err = -ENOENT;
+	struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
+	int err = -EAGAIN;
 	struct rhash_head *head, *next, *entry;
 	spinlock_t *new_bucket_lock;
 	unsigned int new_hash;
 
+	if (new_tbl->nest)
+		goto out;
+
+	err = -ENOENT;
+
 	rht_for_each(entry, old_tbl, old_hash) {
 		err = 0;
 		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@@ -202,19 +312,26 @@ out:
 	return err;
 }
 
-static void rhashtable_rehash_chain(struct rhashtable *ht,
+static int rhashtable_rehash_chain(struct rhashtable *ht,
 				    unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	spinlock_t *old_bucket_lock;
+	int err;
 
 	old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
 
 	spin_lock_bh(old_bucket_lock);
-	while (!rhashtable_rehash_one(ht, old_hash))
+	while (!(err = rhashtable_rehash_one(ht, old_hash)))
 		;
-	old_tbl->rehash++;
+
+	if (err == -ENOENT) {
+		old_tbl->rehash++;
+		err = 0;
+	}
 	spin_unlock_bh(old_bucket_lock);
+
+	return err;
 }
 
 static int rhashtable_rehash_attach(struct rhashtable *ht,
@@ -246,13 +363,17 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 	struct bucket_table *new_tbl;
 	struct rhashtable_walker *walker;
 	unsigned int old_hash;
+	int err;
 
 	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
 	if (!new_tbl)
 		return 0;
 
-	for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
-		rhashtable_rehash_chain(ht, old_hash);
+	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+		err = rhashtable_rehash_chain(ht, old_hash);
+		if (err)
+			return err;
+	}
 
 	/* Publish the new table pointer. */
 	rcu_assign_pointer(ht->tbl, new_tbl);
@@ -271,31 +392,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
 }
 
-/**
- * rhashtable_expand - Expand hash table while allowing concurrent lookups
- * @ht:		the hash table to expand
- *
- * A secondary bucket array is allocated and the hash entries are migrated.
- *
- * This function may only be called in a context where it is safe to call
- * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
- *
- * The caller must ensure that no concurrent resizing occurs by holding
- * ht->mutex.
- *
- * It is valid to have concurrent insertions and deletions protected by per
- * bucket locks or concurrent RCU protected lookups and traversals.
- */
-static int rhashtable_expand(struct rhashtable *ht)
+static int rhashtable_rehash_alloc(struct rhashtable *ht,
+				   struct bucket_table *old_tbl,
+				   unsigned int size)
 {
-	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl;
 	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	old_tbl = rhashtable_last_table(ht, old_tbl);
-
-	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
+	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
@@ -324,12 +430,9 @@ static int rhashtable_expand(struct rhashtable *ht)
  */
 static int rhashtable_shrink(struct rhashtable *ht)
 {
-	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	unsigned int nelems = atomic_read(&ht->nelems);
 	unsigned int size = 0;
-	int err;
-
-	ASSERT_RHT_MUTEX(ht);
 
 	if (nelems)
 		size = roundup_pow_of_two(nelems * 3 / 2);
@@ -342,15 +445,7 @@ static int rhashtable_shrink(struct rhashtable *ht)
 	if (rht_dereference(old_tbl->future_tbl, ht))
 		return -EEXIST;
 
-	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
-	if (new_tbl == NULL)
-		return -ENOMEM;
-
-	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
-	if (err)
-		bucket_table_free(new_tbl);
-
-	return err;
+	return rhashtable_rehash_alloc(ht, old_tbl, size);
 }
 
 static void rht_deferred_worker(struct work_struct *work)
@@ -366,11 +461,14 @@ static void rht_deferred_worker(struct work_struct *work)
 	tbl = rhashtable_last_table(ht, tbl);
 
 	if (rht_grow_above_75(ht, tbl))
-		rhashtable_expand(ht);
+		err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
 	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
-		rhashtable_shrink(ht);
+		err = rhashtable_shrink(ht);
+	else if (tbl->nest)
+		err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
 
-	err = rhashtable_rehash_table(ht);
+	if (!err)
+		err = rhashtable_rehash_table(ht);
 
 	mutex_unlock(&ht->mutex);
 
@@ -439,8 +537,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 	int elasticity;
 
 	elasticity = ht->elasticity;
-	pprev = &tbl->buckets[hash];
-	rht_for_each(head, tbl, hash) {
+	pprev = rht_bucket_var(tbl, hash);
+	rht_for_each_continue(head, *pprev, tbl, hash) {
 		struct rhlist_head *list;
 		struct rhlist_head *plist;
 
@@ -477,6 +575,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 						  struct rhash_head *obj,
 						  void *data)
 {
+	struct rhash_head __rcu **pprev;
 	struct bucket_table *new_tbl;
 	struct rhash_head *head;
 
@@ -499,7 +598,11 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		return ERR_PTR(-EAGAIN);
 
-	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+	pprev = rht_bucket_insert(ht, tbl, hash);
+	if (!pprev)
+		return ERR_PTR(-ENOMEM);
+
+	head = rht_dereference_bucket(*pprev, tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (ht->rhlist) {
@@ -509,7 +612,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(tbl->buckets[hash], obj);
+	rcu_assign_pointer(*pprev, obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
@@ -975,7 +1078,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void (*free_fn)(void *ptr, void *arg),
 				 void *arg)
 {
-	const struct bucket_table *tbl;
+	struct bucket_table *tbl;
 	unsigned int i;
 
 	cancel_work_sync(&ht->run_work);
@@ -986,7 +1089,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 		for (i = 0; i < tbl->size; i++) {
 			struct rhash_head *pos, *next;
 
-			for (pos = rht_dereference(tbl->buckets[i], ht),
+			for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL;
 			     !rht_is_a_nulls(pos);
@@ -1007,3 +1110,70 @@ void rhashtable_destroy(struct rhashtable *ht)
 	return rhashtable_free_and_destroy(ht, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
+
+struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+					    unsigned int hash)
+{
+	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+	static struct rhash_head __rcu *rhnull =
+		(struct rhash_head __rcu *)NULLS_MARKER(0);
+	unsigned int index = hash & ((1 << tbl->nest) - 1);
+	unsigned int size = tbl->size >> tbl->nest;
+	unsigned int subhash = hash;
+	union nested_table *ntbl;
+
+	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
+	ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
+	subhash >>= tbl->nest;
+
+	while (ntbl && size > (1 << shift)) {
+		index = subhash & ((1 << shift) - 1);
+		ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
+		size >>= shift;
+		subhash >>= shift;
+	}
+
+	if (!ntbl)
+		return &rhnull;
+
+	return &ntbl[subhash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested);
+
+struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+						   struct bucket_table *tbl,
+						   unsigned int hash)
+{
+	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
+	unsigned int index = hash & ((1 << tbl->nest) - 1);
+	unsigned int size = tbl->size >> tbl->nest;
+	union nested_table *ntbl;
+	unsigned int shifted;
+	unsigned int nhash;
+
+	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
+	hash >>= tbl->nest;
+	nhash = index;
+	shifted = tbl->nest;
+	ntbl = nested_table_alloc(ht, &ntbl[index].table,
+				  size <= (1 << shift) ? shifted : 0, nhash);
+
+	while (ntbl && size > (1 << shift)) {
+		index = hash & ((1 << shift) - 1);
+		size >>= shift;
+		hash >>= shift;
+		nhash |= index << shifted;
+		shifted += shift;
+		ntbl = nested_table_alloc(ht, &ntbl[index].table,
+					  size <= (1 << shift) ? shifted : 0,
+					  nhash);
+	}
+
+	if (!ntbl)
+		return NULL;
+
+	return &ntbl[hash].bucket;
+
+}
+EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
-- 
cgit v1.2.3


From a725eb15db80643a160310ed6bcfd6c5a6c907f2 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Wed, 15 Feb 2017 05:23:26 +0300
Subject: uapi: fix linux/if_pppol2tp.h userspace compilation errors

Because of <linux/libc-compat.h> interface limitations, <netinet/in.h>
provided by libc cannot be included after <linux/in.h>, therefore any
header that includes <netinet/in.h> cannot be included after <linux/in.h>.

Change uapi/linux/l2tp.h, the last uapi header that includes
<netinet/in.h>, to include <linux/in.h> and <linux/in6.h> instead of
<netinet/in.h> and use __SOCK_SIZE__ instead of sizeof(struct sockaddr)
the same way as uapi/linux/in.h does, to fix linux/if_pppol2tp.h userspace
compilation errors like this:

In file included from /usr/include/linux/l2tp.h:12:0,
                 from /usr/include/linux/if_pppol2tp.h:21,
/usr/include/netinet/in.h:31:8: error: redefinition of 'struct in_addr'

Fixes: 47c3e7783be4 ("net: l2tp: deprecate PPPOL2TP_MSG_* in favour of L2TP_MSG_*")
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 85ddb74fcd1c..b23c1914a182 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -9,9 +9,8 @@
 
 #include <linux/types.h>
 #include <linux/socket.h>
-#ifndef __KERNEL__
-#include <netinet/in.h>
-#endif
+#include <linux/in.h>
+#include <linux/in6.h>
 
 #define IPPROTO_L2TP		115
 
@@ -31,7 +30,7 @@ struct sockaddr_l2tpip {
 	__u32		l2tp_conn_id;	/* Connection ID of tunnel */
 
 	/* Pad to size of `struct sockaddr'. */
-	unsigned char	__pad[sizeof(struct sockaddr) -
+	unsigned char	__pad[__SOCK_SIZE__ -
 			      sizeof(__kernel_sa_family_t) -
 			      sizeof(__be16) - sizeof(struct in_addr) -
 			      sizeof(__u32)];
-- 
cgit v1.2.3


From bf3f14d6342cfb37eab8f0cddd0e4d4063fd9fc9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 15 Feb 2017 22:29:51 -0500
Subject: rhashtable: Revert nested table changes.

This reverts commits:

6a25478077d987edc5e2f880590a2bc5fcab4441
9dbbfb0ab6680c6a85609041011484e6658e7d3c
40137906c5f55c252194ef5834130383e639536f

It's too risky to put in this late in the release
cycle.  We'll put these changes into the next merge
window instead.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/gfs2/glock.c            |  28 ++---
 include/linux/rhashtable.h |  78 ++++---------
 lib/rhashtable.c           | 270 +++++++++------------------------------------
 net/tipc/net.c             |   4 -
 net/tipc/socket.c          |  30 ++---
 5 files changed, 94 insertions(+), 316 deletions(-)

(limited to 'include')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 70e94170af85..94f50cac91c6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1420,32 +1420,26 @@ static struct shrinker glock_shrinker = {
  * @sdp: the filesystem
  * @bucket: the bucket
  *
- * Note that the function can be called multiple times on the same
- * object.  So the user must ensure that the function can cope with
- * that.
  */
 
 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
 {
 	struct gfs2_glock *gl;
-	struct rhashtable_iter iter;
-
-	rhashtable_walk_enter(&gl_hash_table, &iter);
-
-	do {
-		gl = ERR_PTR(rhashtable_walk_start(&iter));
-		if (gl)
-			continue;
+	struct rhash_head *pos;
+	const struct bucket_table *tbl;
+	int i;
 
-		while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
+	rcu_read_lock();
+	tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table);
+	for (i = 0; i < tbl->size; i++) {
+		rht_for_each_entry_rcu(gl, pos, tbl, i, gl_node) {
 			if ((gl->gl_name.ln_sbd == sdp) &&
 			    lockref_get_not_dead(&gl->gl_lockref))
 				examiner(gl);
-
-		rhashtable_walk_stop(&iter);
-	} while (cond_resched(), gl == ERR_PTR(-EAGAIN));
-
-	rhashtable_walk_exit(&iter);
+		}
+	}
+	rcu_read_unlock();
+	cond_resched();
 }
 
 /**
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f2e12a845910..5c132d3188be 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -61,7 +61,6 @@ struct rhlist_head {
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
- * @nest: Number of bits of first-level nested table.
  * @rehash: Current bucket being rehashed
  * @hash_rnd: Random seed to fold into hash
  * @locks_mask: Mask to apply before accessing locks[]
@@ -69,12 +68,10 @@ struct rhlist_head {
  * @walkers: List of active walkers
  * @rcu: RCU structure for freeing the table
  * @future_tbl: Table under construction during rehashing
- * @ntbl: Nested table used when out of memory.
  * @buckets: size * hash buckets
  */
 struct bucket_table {
 	unsigned int		size;
-	unsigned int		nest;
 	unsigned int		rehash;
 	u32			hash_rnd;
 	unsigned int		locks_mask;
@@ -84,7 +81,7 @@ struct bucket_table {
 
 	struct bucket_table __rcu *future_tbl;
 
-	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
+	struct rhash_head __rcu	*buckets[] ____cacheline_aligned_in_smp;
 };
 
 /**
@@ -377,12 +374,6 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void *arg);
 void rhashtable_destroy(struct rhashtable *ht);
 
-struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
-					    unsigned int hash);
-struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
-						   struct bucket_table *tbl,
-						   unsigned int hash);
-
 #define rht_dereference(p, ht) \
 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
 
@@ -398,27 +389,6 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
 #define rht_entry(tpos, pos, member) \
 	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 
-static inline struct rhash_head __rcu *const *rht_bucket(
-	const struct bucket_table *tbl, unsigned int hash)
-{
-	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
-				     &tbl->buckets[hash];
-}
-
-static inline struct rhash_head __rcu **rht_bucket_var(
-	struct bucket_table *tbl, unsigned int hash)
-{
-	return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
-				     &tbl->buckets[hash];
-}
-
-static inline struct rhash_head __rcu **rht_bucket_insert(
-	struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
-{
-	return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
-				     &tbl->buckets[hash];
-}
-
 /**
  * rht_for_each_continue - continue iterating over hash chain
  * @pos:	the &struct rhash_head to use as a loop cursor.
@@ -438,7 +408,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * @hash:	the hash value / bucket index
  */
 #define rht_for_each(pos, tbl, hash) \
-	rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
+	rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
 
 /**
  * rht_for_each_entry_continue - continue iterating over hash chain
@@ -463,7 +433,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * @member:	name of the &struct rhash_head within the hashable struct.
  */
 #define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
-	rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash),	\
+	rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],	\
 				    tbl, hash, member)
 
 /**
@@ -478,13 +448,13 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * This hash chain list-traversal primitive allows for the looped code to
  * remove the loop cursor from the list.
  */
-#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	      \
-	for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
-	     next = !rht_is_a_nulls(pos) ?				      \
-		       rht_dereference_bucket(pos->next, tbl, hash) : NULL;   \
-	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	      \
-	     pos = next,						      \
-	     next = !rht_is_a_nulls(pos) ?				      \
+#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	    \
+	for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
+	     next = !rht_is_a_nulls(pos) ?				    \
+		       rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
+	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
+	     pos = next,						    \
+	     next = !rht_is_a_nulls(pos) ?				    \
 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
 
 /**
@@ -515,7 +485,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * traversal is guarded by rcu_read_lock().
  */
 #define rht_for_each_rcu(pos, tbl, hash)				\
-	rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
+	rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
 
 /**
  * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
@@ -548,8 +518,8 @@ static inline struct rhash_head __rcu **rht_bucket_insert(
  * the _rcu mutation primitives such as rhashtable_insert() as long as the
  * traversal is guarded by rcu_read_lock().
  */
-#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		   \
-	rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		\
+	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
 					tbl, hash, member)
 
 /**
@@ -595,7 +565,7 @@ static inline struct rhash_head *__rhashtable_lookup(
 		.ht = ht,
 		.key = key,
 	};
-	struct bucket_table *tbl;
+	const struct bucket_table *tbl;
 	struct rhash_head *he;
 	unsigned int hash;
 
@@ -727,12 +697,8 @@ slow_path:
 	}
 
 	elasticity = ht->elasticity;
-	pprev = rht_bucket_insert(ht, tbl, hash);
-	data = ERR_PTR(-ENOMEM);
-	if (!pprev)
-		goto out;
-
-	rht_for_each_continue(head, *pprev, tbl, hash) {
+	pprev = &tbl->buckets[hash];
+	rht_for_each(head, tbl, hash) {
 		struct rhlist_head *plist;
 		struct rhlist_head *list;
 
@@ -770,7 +736,7 @@ slow_path:
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		goto slow_path;
 
-	head = rht_dereference_bucket(*pprev, tbl, hash);
+	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (rhlist) {
@@ -780,7 +746,7 @@ slow_path:
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(*pprev, obj);
+	rcu_assign_pointer(tbl->buckets[hash], obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
@@ -989,8 +955,8 @@ static inline int __rhashtable_remove_fast_one(
 
 	spin_lock_bh(lock);
 
-	pprev = rht_bucket_var(tbl, hash);
-	rht_for_each_continue(he, *pprev, tbl, hash) {
+	pprev = &tbl->buckets[hash];
+	rht_for_each(he, tbl, hash) {
 		struct rhlist_head *list;
 
 		list = container_of(he, struct rhlist_head, rhead);
@@ -1141,8 +1107,8 @@ static inline int __rhashtable_replace_fast(
 
 	spin_lock_bh(lock);
 
-	pprev = rht_bucket_var(tbl, hash);
-	rht_for_each_continue(he, *pprev, tbl, hash) {
+	pprev = &tbl->buckets[hash];
+	rht_for_each(he, tbl, hash) {
 		if (he != obj_old) {
 			pprev = &he->next;
 			continue;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 172454e6b979..32d0ad058380 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -32,11 +32,6 @@
 #define HASH_MIN_SIZE		4U
 #define BUCKET_LOCKS_PER_CPU	32UL
 
-union nested_table {
-	union nested_table __rcu *table;
-	struct rhash_head __rcu *bucket;
-};
-
 static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
 		       const struct rhash_head *he)
@@ -81,9 +76,6 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
 	/* Never allocate more than 0.5 locks per bucket */
 	size = min_t(unsigned int, size, tbl->size >> 1);
 
-	if (tbl->nest)
-		size = min(size, 1U << tbl->nest);
-
 	if (sizeof(spinlock_t) != 0) {
 		tbl->locks = NULL;
 #ifdef CONFIG_NUMA
@@ -107,45 +99,8 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
 	return 0;
 }
 
-static void nested_table_free(union nested_table *ntbl, unsigned int size)
-{
-	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
-	const unsigned int len = 1 << shift;
-	unsigned int i;
-
-	ntbl = rcu_dereference_raw(ntbl->table);
-	if (!ntbl)
-		return;
-
-	if (size > len) {
-		size >>= shift;
-		for (i = 0; i < len; i++)
-			nested_table_free(ntbl + i, size);
-	}
-
-	kfree(ntbl);
-}
-
-static void nested_bucket_table_free(const struct bucket_table *tbl)
-{
-	unsigned int size = tbl->size >> tbl->nest;
-	unsigned int len = 1 << tbl->nest;
-	union nested_table *ntbl;
-	unsigned int i;
-
-	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
-
-	for (i = 0; i < len; i++)
-		nested_table_free(ntbl + i, size);
-
-	kfree(ntbl);
-}
-
 static void bucket_table_free(const struct bucket_table *tbl)
 {
-	if (tbl->nest)
-		nested_bucket_table_free(tbl);
-
 	if (tbl)
 		kvfree(tbl->locks);
 
@@ -157,59 +112,6 @@ static void bucket_table_free_rcu(struct rcu_head *head)
 	bucket_table_free(container_of(head, struct bucket_table, rcu));
 }
 
-static union nested_table *nested_table_alloc(struct rhashtable *ht,
-					      union nested_table __rcu **prev,
-					      unsigned int shifted,
-					      unsigned int nhash)
-{
-	union nested_table *ntbl;
-	int i;
-
-	ntbl = rcu_dereference(*prev);
-	if (ntbl)
-		return ntbl;
-
-	ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
-
-	if (ntbl && shifted) {
-		for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
-			INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
-					    (i << shifted) | nhash);
-	}
-
-	rcu_assign_pointer(*prev, ntbl);
-
-	return ntbl;
-}
-
-static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
-						      size_t nbuckets,
-						      gfp_t gfp)
-{
-	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
-	struct bucket_table *tbl;
-	size_t size;
-
-	if (nbuckets < (1 << (shift + 1)))
-		return NULL;
-
-	size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
-
-	tbl = kzalloc(size, gfp);
-	if (!tbl)
-		return NULL;
-
-	if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
-				0, 0)) {
-		kfree(tbl);
-		return NULL;
-	}
-
-	tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
-
-	return tbl;
-}
-
 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 					       size_t nbuckets,
 					       gfp_t gfp)
@@ -224,17 +126,10 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 		tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
 	if (tbl == NULL && gfp == GFP_KERNEL)
 		tbl = vzalloc(size);
-
-	size = nbuckets;
-
-	if (tbl == NULL && gfp != GFP_KERNEL) {
-		tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
-		nbuckets = 0;
-	}
 	if (tbl == NULL)
 		return NULL;
 
-	tbl->size = size;
+	tbl->size = nbuckets;
 
 	if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
 		bucket_table_free(tbl);
@@ -269,17 +164,12 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	struct bucket_table *new_tbl = rhashtable_last_table(ht,
 		rht_dereference_rcu(old_tbl->future_tbl, ht));
-	struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
-	int err = -EAGAIN;
+	struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
+	int err = -ENOENT;
 	struct rhash_head *head, *next, *entry;
 	spinlock_t *new_bucket_lock;
 	unsigned int new_hash;
 
-	if (new_tbl->nest)
-		goto out;
-
-	err = -ENOENT;
-
 	rht_for_each(entry, old_tbl, old_hash) {
 		err = 0;
 		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
@@ -312,26 +202,19 @@ out:
 	return err;
 }
 
-static int rhashtable_rehash_chain(struct rhashtable *ht,
+static void rhashtable_rehash_chain(struct rhashtable *ht,
 				    unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	spinlock_t *old_bucket_lock;
-	int err;
 
 	old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
 
 	spin_lock_bh(old_bucket_lock);
-	while (!(err = rhashtable_rehash_one(ht, old_hash)))
+	while (!rhashtable_rehash_one(ht, old_hash))
 		;
-
-	if (err == -ENOENT) {
-		old_tbl->rehash++;
-		err = 0;
-	}
+	old_tbl->rehash++;
 	spin_unlock_bh(old_bucket_lock);
-
-	return err;
 }
 
 static int rhashtable_rehash_attach(struct rhashtable *ht,
@@ -363,17 +246,13 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 	struct bucket_table *new_tbl;
 	struct rhashtable_walker *walker;
 	unsigned int old_hash;
-	int err;
 
 	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
 	if (!new_tbl)
 		return 0;
 
-	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
-		err = rhashtable_rehash_chain(ht, old_hash);
-		if (err)
-			return err;
-	}
+	for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
+		rhashtable_rehash_chain(ht, old_hash);
 
 	/* Publish the new table pointer. */
 	rcu_assign_pointer(ht->tbl, new_tbl);
@@ -392,16 +271,31 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
 }
 
-static int rhashtable_rehash_alloc(struct rhashtable *ht,
-				   struct bucket_table *old_tbl,
-				   unsigned int size)
+/**
+ * rhashtable_expand - Expand hash table while allowing concurrent lookups
+ * @ht:		the hash table to expand
+ *
+ * A secondary bucket array is allocated and the hash entries are migrated.
+ *
+ * This function may only be called in a context where it is safe to call
+ * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
+ *
+ * The caller must ensure that no concurrent resizing occurs by holding
+ * ht->mutex.
+ *
+ * It is valid to have concurrent insertions and deletions protected by per
+ * bucket locks or concurrent RCU protected lookups and traversals.
+ */
+static int rhashtable_expand(struct rhashtable *ht)
 {
-	struct bucket_table *new_tbl;
+	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
 	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+	old_tbl = rhashtable_last_table(ht, old_tbl);
+
+	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
@@ -430,9 +324,12 @@ static int rhashtable_rehash_alloc(struct rhashtable *ht,
  */
 static int rhashtable_shrink(struct rhashtable *ht)
 {
-	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
 	unsigned int nelems = atomic_read(&ht->nelems);
 	unsigned int size = 0;
+	int err;
+
+	ASSERT_RHT_MUTEX(ht);
 
 	if (nelems)
 		size = roundup_pow_of_two(nelems * 3 / 2);
@@ -445,7 +342,15 @@ static int rhashtable_shrink(struct rhashtable *ht)
 	if (rht_dereference(old_tbl->future_tbl, ht))
 		return -EEXIST;
 
-	return rhashtable_rehash_alloc(ht, old_tbl, size);
+	new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
+	if (new_tbl == NULL)
+		return -ENOMEM;
+
+	err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
+	if (err)
+		bucket_table_free(new_tbl);
+
+	return err;
 }
 
 static void rht_deferred_worker(struct work_struct *work)
@@ -461,14 +366,11 @@ static void rht_deferred_worker(struct work_struct *work)
 	tbl = rhashtable_last_table(ht, tbl);
 
 	if (rht_grow_above_75(ht, tbl))
-		err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
+		rhashtable_expand(ht);
 	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
-		err = rhashtable_shrink(ht);
-	else if (tbl->nest)
-		err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
+		rhashtable_shrink(ht);
 
-	if (!err)
-		err = rhashtable_rehash_table(ht);
+	err = rhashtable_rehash_table(ht);
 
 	mutex_unlock(&ht->mutex);
 
@@ -537,8 +439,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 	int elasticity;
 
 	elasticity = ht->elasticity;
-	pprev = rht_bucket_var(tbl, hash);
-	rht_for_each_continue(head, *pprev, tbl, hash) {
+	pprev = &tbl->buckets[hash];
+	rht_for_each(head, tbl, hash) {
 		struct rhlist_head *list;
 		struct rhlist_head *plist;
 
@@ -575,7 +477,6 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 						  struct rhash_head *obj,
 						  void *data)
 {
-	struct rhash_head __rcu **pprev;
 	struct bucket_table *new_tbl;
 	struct rhash_head *head;
 
@@ -598,11 +499,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		return ERR_PTR(-EAGAIN);
 
-	pprev = rht_bucket_insert(ht, tbl, hash);
-	if (!pprev)
-		return ERR_PTR(-ENOMEM);
-
-	head = rht_dereference_bucket(*pprev, tbl, hash);
+	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (ht->rhlist) {
@@ -612,7 +509,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(*pprev, obj);
+	rcu_assign_pointer(tbl->buckets[hash], obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
@@ -1078,7 +975,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void (*free_fn)(void *ptr, void *arg),
 				 void *arg)
 {
-	struct bucket_table *tbl;
+	const struct bucket_table *tbl;
 	unsigned int i;
 
 	cancel_work_sync(&ht->run_work);
@@ -1089,7 +986,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 		for (i = 0; i < tbl->size; i++) {
 			struct rhash_head *pos, *next;
 
-			for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
+			for (pos = rht_dereference(tbl->buckets[i], ht),
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL;
 			     !rht_is_a_nulls(pos);
@@ -1110,70 +1007,3 @@ void rhashtable_destroy(struct rhashtable *ht)
 	return rhashtable_free_and_destroy(ht, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
-
-struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
-					    unsigned int hash)
-{
-	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
-	static struct rhash_head __rcu *rhnull =
-		(struct rhash_head __rcu *)NULLS_MARKER(0);
-	unsigned int index = hash & ((1 << tbl->nest) - 1);
-	unsigned int size = tbl->size >> tbl->nest;
-	unsigned int subhash = hash;
-	union nested_table *ntbl;
-
-	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
-	ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
-	subhash >>= tbl->nest;
-
-	while (ntbl && size > (1 << shift)) {
-		index = subhash & ((1 << shift) - 1);
-		ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash);
-		size >>= shift;
-		subhash >>= shift;
-	}
-
-	if (!ntbl)
-		return &rhnull;
-
-	return &ntbl[subhash].bucket;
-
-}
-EXPORT_SYMBOL_GPL(rht_bucket_nested);
-
-struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
-						   struct bucket_table *tbl,
-						   unsigned int hash)
-{
-	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
-	unsigned int index = hash & ((1 << tbl->nest) - 1);
-	unsigned int size = tbl->size >> tbl->nest;
-	union nested_table *ntbl;
-	unsigned int shifted;
-	unsigned int nhash;
-
-	ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
-	hash >>= tbl->nest;
-	nhash = index;
-	shifted = tbl->nest;
-	ntbl = nested_table_alloc(ht, &ntbl[index].table,
-				  size <= (1 << shift) ? shifted : 0, nhash);
-
-	while (ntbl && size > (1 << shift)) {
-		index = hash & ((1 << shift) - 1);
-		size >>= shift;
-		hash >>= shift;
-		nhash |= index << shifted;
-		shifted += shift;
-		ntbl = nested_table_alloc(ht, &ntbl[index].table,
-					  size <= (1 << shift) ? shifted : 0,
-					  nhash);
-	}
-
-	if (!ntbl)
-		return NULL;
-
-	return &ntbl[hash].bucket;
-
-}
-EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index ab8a2d5d1e32..28bf4feeb81c 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -110,10 +110,6 @@ int tipc_net_start(struct net *net, u32 addr)
 	char addr_string[16];
 
 	tn->own_addr = addr;
-
-	/* Ensure that the new address is visible before we reinit. */
-	smp_mb();
-
 	tipc_named_reinit(net);
 	tipc_sk_reinit(net);
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 370a5912bcb5..800caaa699a1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -384,6 +384,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	INIT_LIST_HEAD(&tsk->publications);
 	msg = &tsk->phdr;
 	tn = net_generic(sock_net(sk), tipc_net_id);
+	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
+		      NAMED_H_SIZE, 0);
 
 	/* Finish initializing socket data structures */
 	sock->ops = ops;
@@ -393,13 +395,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 		pr_warn("Socket create failed; port number exhausted\n");
 		return -EINVAL;
 	}
-
-	/* Ensure tsk is visible before we read own_addr. */
-	smp_mb();
-
-	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
-		      NAMED_H_SIZE, 0);
-
 	msg_set_origport(msg, tsk->portid);
 	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
 	sk->sk_shutdown = 0;
@@ -2274,27 +2269,24 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 void tipc_sk_reinit(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct rhashtable_iter iter;
+	const struct bucket_table *tbl;
+	struct rhash_head *pos;
 	struct tipc_sock *tsk;
 	struct tipc_msg *msg;
+	int i;
 
-	rhashtable_walk_enter(&tn->sk_rht, &iter);
-
-	do {
-		tsk = ERR_PTR(rhashtable_walk_start(&iter));
-		if (tsk)
-			continue;
-
-		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
+	rcu_read_lock();
+	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
+	for (i = 0; i < tbl->size; i++) {
+		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
 			spin_lock_bh(&tsk->sk.sk_lock.slock);
 			msg = &tsk->phdr;
 			msg_set_prevnode(msg, tn->own_addr);
 			msg_set_orignode(msg, tn->own_addr);
 			spin_unlock_bh(&tsk->sk.sk_lock.slock);
 		}
-
-		rhashtable_walk_stop(&iter);
-	} while (tsk == ERR_PTR(-EAGAIN));
+	}
+	rcu_read_unlock();
 }
 
 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
-- 
cgit v1.2.3