From 23cb2d04db54535df65edbbebbca89f2590f08bd Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 26 Oct 2018 16:54:43 -0500
Subject: ASoC: fix oops w/ for_each_rtd_codec_dai_rollback() macro

A kernel oops happens on an error case (usual missing BE mixer
configuration required by Intel SST driver). Git bisect points to this
macro and an operator precedence issue.

	for (; ((i--) >= 0) && ((dai) = rtd->codec_dais[i]);)

The initial code replaced by this macro was
	while (--i >= 0) {
		codec_dai = rtd->codec_dais[i];

Fix the C operator precedence difference by reverting to pre-decrement

Fixes: 0b7990e3897 ('ASoC: add for_each_rtd_codec_dai() macro')
Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index f1dab1f4b194..70c10a8f3e90 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1192,7 +1192,7 @@ struct snd_soc_pcm_runtime {
 	     ((i) < rtd->num_codecs) && ((dai) = rtd->codec_dais[i]); \
 	     (i)++)
 #define for_each_rtd_codec_dai_rollback(rtd, i, dai)		\
-	for (; ((i--) >= 0) && ((dai) = rtd->codec_dais[i]);)
+	for (; ((--i) >= 0) && ((dai) = rtd->codec_dais[i]);)
 
 
 /* mixer control */
-- 
cgit v1.2.3


From dafb7f9aef2fd44991ff1691721ff765a23be27b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 2 Nov 2018 06:36:32 -0400
Subject: v4l2-controls: add a missing include

As warned by "make headers_check", the definition for the linux-specific
integer types is missing:

	./usr/include/linux/v4l2-controls.h:1105: found __[us]{8,16,32,64} type without #include <linux/types.h>

Fixes: c27bb30e7b6d ("media: v4l: Add definitions for MPEG-2 slice format and metadata")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/uapi/linux/v4l2-controls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 51b095898f4b..86a54916206f 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -47,6 +47,8 @@
  *  videodev2.h.
  */
 
+#include <linux/types.h>
+
 #ifndef __LINUX_V4L2_CONTROLS_H
 #define __LINUX_V4L2_CONTROLS_H
 
-- 
cgit v1.2.3


From 4c0608f4a0e76dfb82d3accd20081f4bf47ed143 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 30 Oct 2018 09:45:55 -0400
Subject: XArray: Regularise xa_reserve

The xa_reserve() function was a little unusual in that it attempted to
be callable for all kinds of locking scenarios.  Make it look like the
other APIs with __xa_reserve, xa_reserve_bh and xa_reserve_irq variants.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst | 13 +++++++
 include/linux/xarray.h            | 80 ++++++++++++++++++++++++++++++++++++++-
 lib/test_xarray.c                 |  6 +++
 lib/xarray.c                      | 18 ++++-----
 4 files changed, 105 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index a4e705108f42..65c77a81b689 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -105,6 +105,15 @@ may result in the entry being marked at some, but not all of the other
 indices.  Storing into one index may result in the entry retrieved by
 some, but not all of the other indices changing.
 
+Sometimes you need to ensure that a subsequent call to :c:func:`xa_store`
+will not need to allocate memory.  The :c:func:`xa_reserve` function
+will store a reserved entry at the indicated index.  Users of the normal
+API will see this entry as containing ``NULL``.  If you do not need to
+use the reserved entry, you can call :c:func:`xa_release` to remove the
+unused entry.  If another user has stored to the entry in the meantime,
+:c:func:`xa_release` will do nothing; if instead you want the entry to
+become ``NULL``, you should use :c:func:`xa_erase`.
+
 Finally, you can remove all entries from an XArray by calling
 :c:func:`xa_destroy`.  If the XArray entries are pointers, you may wish
 to free the entries first.  You can do this by iterating over all present
@@ -167,6 +176,9 @@ Takes xa_lock internally:
  * :c:func:`xa_alloc`
  * :c:func:`xa_alloc_bh`
  * :c:func:`xa_alloc_irq`
+ * :c:func:`xa_reserve`
+ * :c:func:`xa_reserve_bh`
+ * :c:func:`xa_reserve_irq`
  * :c:func:`xa_destroy`
  * :c:func:`xa_set_mark`
  * :c:func:`xa_clear_mark`
@@ -177,6 +189,7 @@ Assumes xa_lock held on entry:
  * :c:func:`__xa_erase`
  * :c:func:`__xa_cmpxchg`
  * :c:func:`__xa_alloc`
+ * :c:func:`__xa_reserve`
  * :c:func:`__xa_set_mark`
  * :c:func:`__xa_clear_mark`
 
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index d9514928ddac..c2cb0426c60c 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -291,7 +291,6 @@ void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *xa_cmpxchg(struct xarray *, unsigned long index,
 			void *old, void *entry, gfp_t);
-int xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
 			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -455,6 +454,7 @@ void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
 		void *entry, gfp_t);
 int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t);
+int __xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
@@ -621,6 +621,84 @@ static inline int xa_alloc_irq(struct xarray *xa, u32 *id, u32 max, void *entry,
 	return err;
 }
 
+/**
+ * xa_reserve() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * Ensures there is somewhere to store an entry at @index in the array.
+ * If there is already something stored at @index, this function does
+ * nothing.  If there was nothing there, the entry is marked as reserved.
+ * Loading from a reserved entry returns a %NULL pointer.
+ *
+ * If you do not use the entry that you have reserved, call xa_release()
+ * or xa_erase() to free any unnecessary memory.
+ *
+ * Context: Any context.  Takes and releases the xa_lock.
+ * May sleep if the @gfp flags permit.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+static inline
+int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	int ret;
+
+	xa_lock(xa);
+	ret = __xa_reserve(xa, index, gfp);
+	xa_unlock(xa);
+
+	return ret;
+}
+
+/**
+ * xa_reserve_bh() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * A softirq-disabling version of xa_reserve().
+ *
+ * Context: Any context.  Takes and releases the xa_lock while
+ * disabling softirqs.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+static inline
+int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	int ret;
+
+	xa_lock_bh(xa);
+	ret = __xa_reserve(xa, index, gfp);
+	xa_unlock_bh(xa);
+
+	return ret;
+}
+
+/**
+ * xa_reserve_irq() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * An interrupt-disabling version of xa_reserve().
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+static inline
+int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	int ret;
+
+	xa_lock_irq(xa);
+	ret = __xa_reserve(xa, index, gfp);
+	xa_unlock_irq(xa);
+
+	return ret;
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 126127658b49..e5294b20b52f 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -373,6 +373,12 @@ static noinline void check_reserve(struct xarray *xa)
 	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
+	/* And so does xa_insert */
+	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) != 0);
+	xa_erase_index(xa, 12345678);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
 	/* Can iterate through a reserved entry */
 	xa_store_index(xa, 5, GFP_KERNEL);
 	xa_reserve(xa, 6, GFP_KERNEL);
diff --git a/lib/xarray.c b/lib/xarray.c
index e7be4e47c6a9..9cab8cfef8a8 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1488,7 +1488,7 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 EXPORT_SYMBOL(__xa_cmpxchg);
 
 /**
- * xa_reserve() - Reserve this index in the XArray.
+ * __xa_reserve() - Reserve this index in the XArray.
  * @xa: XArray.
  * @index: Index into array.
  * @gfp: Memory allocation flags.
@@ -1496,33 +1496,29 @@ EXPORT_SYMBOL(__xa_cmpxchg);
  * Ensures there is somewhere to store an entry at @index in the array.
  * If there is already something stored at @index, this function does
  * nothing.  If there was nothing there, the entry is marked as reserved.
- * Loads from @index will continue to see a %NULL pointer until a
- * subsequent store to @index.
+ * Loading from a reserved entry returns a %NULL pointer.
  *
  * If you do not use the entry that you have reserved, call xa_release()
  * or xa_erase() to free any unnecessary memory.
  *
- * Context: Process context.  Takes and releases the xa_lock, IRQ or BH safe
- * if specified in XArray flags.  May sleep if the @gfp flags permit.
+ * Context: Any context.  Expects the xa_lock to be held on entry.  May
+ * release the lock, sleep and reacquire the lock if the @gfp flags permit.
  * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
  */
-int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+int __xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
 	XA_STATE(xas, xa, index);
-	unsigned int lock_type = xa_lock_type(xa);
 	void *curr;
 
 	do {
-		xas_lock_type(&xas, lock_type);
 		curr = xas_load(&xas);
 		if (!curr)
 			xas_store(&xas, XA_ZERO_ENTRY);
-		xas_unlock_type(&xas, lock_type);
-	} while (xas_nomem(&xas, gfp));
+	} while (__xas_nomem(&xas, gfp));
 
 	return xas_error(&xas);
 }
-EXPORT_SYMBOL(xa_reserve);
+EXPORT_SYMBOL(__xa_reserve);
 
 #ifdef CONFIG_XARRAY_MULTI
 static void xas_set_range(struct xa_state *xas, unsigned long first,
-- 
cgit v1.2.3


From c5beb07e7a06b24f4f27304f6282b5dbd929543b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 31 Oct 2018 14:39:28 -0400
Subject: XArray: Unify xa_cmpxchg and __xa_cmpxchg

xa_cmpxchg() was one of the largest functions in the xarray
implementation.  By turning it into a wrapper and having the callers
take the lock (like several other functions), we save 160 bytes on a
tinyconfig build and reduce the duplication in xarray.c.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 113 ++++++++++++++++++++++++++++++-------------------
 lib/xarray.c           |  41 ------------------
 2 files changed, 69 insertions(+), 85 deletions(-)

(limited to 'include')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index c2cb0426c60c..8e59d4fbd55e 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -289,8 +289,6 @@ struct xarray {
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
-void *xa_cmpxchg(struct xarray *, unsigned long index,
-			void *old, void *entry, gfp_t);
 void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
 			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -359,48 +357,6 @@ static inline void *xa_erase(struct xarray *xa, unsigned long index)
 	return xa_store(xa, index, NULL, 0);
 }
 
-/**
- * xa_insert() - Store this entry in the XArray unless another entry is
- *			already present.
- * @xa: XArray.
- * @index: Index into array.
- * @entry: New entry.
- * @gfp: Memory allocation flags.
- *
- * If you would rather see the existing entry in the array, use xa_cmpxchg().
- * This function is for users who don't care what the entry is, only that
- * one is present.
- *
- * Context: Process context.  Takes and releases the xa_lock.
- *	    May sleep if the @gfp flags permit.
- * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
- * -ENOMEM if memory could not be allocated.
- */
-static inline int xa_insert(struct xarray *xa, unsigned long index,
-		void *entry, gfp_t gfp)
-{
-	void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp);
-	if (!curr)
-		return 0;
-	if (xa_is_err(curr))
-		return xa_err(curr);
-	return -EEXIST;
-}
-
-/**
- * xa_release() - Release a reserved entry.
- * @xa: XArray.
- * @index: Index of entry.
- *
- * After calling xa_reserve(), you can call this function to release the
- * reservation.  If the entry at @index has been stored to, this function
- * will do nothing.
- */
-static inline void xa_release(struct xarray *xa, unsigned long index)
-{
-	xa_cmpxchg(xa, index, NULL, NULL, 0);
-}
-
 /**
  * xa_for_each() - Iterate over a portion of an XArray.
  * @xa: XArray.
@@ -534,6 +490,61 @@ static inline void *xa_erase_irq(struct xarray *xa, unsigned long index)
 	return entry;
 }
 
+/**
+ * xa_cmpxchg() - Conditionally replace an entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New value to place in array.
+ * @gfp: Memory allocation flags.
+ *
+ * If the entry at @index is the same as @old, replace it with @entry.
+ * If the return value is equal to @old, then the exchange was successful.
+ *
+ * Context: Any context.  Takes and releases the xa_lock.  May sleep
+ * if the @gfp flags permit.
+ * Return: The old value at this index or xa_err() if an error happened.
+ */
+static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock(xa);
+	curr = __xa_cmpxchg(xa, index, old, entry, gfp);
+	xa_unlock(xa);
+
+	return curr;
+}
+
+/**
+ * xa_insert() - Store this entry in the XArray unless another entry is
+ *			already present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * If you would rather see the existing entry in the array, use xa_cmpxchg().
+ * This function is for users who don't care what the entry is, only that
+ * one is present.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ *	    May sleep if the @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+static inline int xa_insert(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp);
+	if (!curr)
+		return 0;
+	if (xa_is_err(curr))
+		return xa_err(curr);
+	return -EEXIST;
+}
+
 /**
  * xa_alloc() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
@@ -699,6 +710,20 @@ int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
 	return ret;
 }
 
+/**
+ * xa_release() - Release a reserved entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * After calling xa_reserve(), you can call this function to release the
+ * reservation.  If the entry at @index has been stored to, this function
+ * will do nothing.
+ */
+static inline void xa_release(struct xarray *xa, unsigned long index)
+{
+	xa_cmpxchg(xa, index, NULL, NULL, 0);
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
diff --git a/lib/xarray.c b/lib/xarray.c
index 9cab8cfef8a8..77671d4a7910 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1406,47 +1406,6 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 }
 EXPORT_SYMBOL(__xa_store);
 
-/**
- * xa_cmpxchg() - Conditionally replace an entry in the XArray.
- * @xa: XArray.
- * @index: Index into array.
- * @old: Old value to test against.
- * @entry: New value to place in array.
- * @gfp: Memory allocation flags.
- *
- * If the entry at @index is the same as @old, replace it with @entry.
- * If the return value is equal to @old, then the exchange was successful.
- *
- * Context: Process context.  Takes and releases the xa_lock.  May sleep
- * if the @gfp flags permit.
- * Return: The old value at this index or xa_err() if an error happened.
- */
-void *xa_cmpxchg(struct xarray *xa, unsigned long index,
-			void *old, void *entry, gfp_t gfp)
-{
-	XA_STATE(xas, xa, index);
-	void *curr;
-
-	if (WARN_ON_ONCE(xa_is_internal(entry)))
-		return XA_ERROR(-EINVAL);
-
-	do {
-		xas_lock(&xas);
-		curr = xas_load(&xas);
-		if (curr == XA_ZERO_ENTRY)
-			curr = NULL;
-		if (curr == old) {
-			xas_store(&xas, entry);
-			if (xa_track_free(xa) && entry)
-				xas_clear_mark(&xas, XA_FREE_MARK);
-		}
-		xas_unlock(&xas);
-	} while (xas_nomem(&xas, gfp));
-
-	return xas_result(&xas, curr);
-}
-EXPORT_SYMBOL(xa_cmpxchg);
-
 /**
  * __xa_cmpxchg() - Store this entry in the XArray.
  * @xa: XArray.
-- 
cgit v1.2.3


From 9c16bb88905456a9b1299338041f05fa7699971b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 15:48:49 -0500
Subject: XArray: Turn xa_erase into an exported function

Make xa_erase() take the spinlock and then call __xa_erase(), but make
it out of line since it's such a common function.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 18 +-----------------
 lib/xarray.c           | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 8e59d4fbd55e..4c839c17a99b 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -289,6 +289,7 @@ struct xarray {
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
+void *xa_erase(struct xarray *, unsigned long index);
 void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
 			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -340,23 +341,6 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
 	return xa->xa_flags & XA_FLAGS_MARK(mark);
 }
 
-/**
- * xa_erase() - Erase this entry from the XArray.
- * @xa: XArray.
- * @index: Index of entry.
- *
- * This function is the equivalent of calling xa_store() with %NULL as
- * the third argument.  The XArray does not need to allocate memory, so
- * the user does not need to provide GFP flags.
- *
- * Context: Process context.  Takes and releases the xa_lock.
- * Return: The entry which used to be at this index.
- */
-static inline void *xa_erase(struct xarray *xa, unsigned long index)
-{
-	return xa_store(xa, index, NULL, 0);
-}
-
 /**
  * xa_for_each() - Iterate over a portion of an XArray.
  * @xa: XArray.
diff --git a/lib/xarray.c b/lib/xarray.c
index 77671d4a7910..b55aa8c1c20f 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1336,6 +1336,30 @@ void *__xa_erase(struct xarray *xa, unsigned long index)
 }
 EXPORT_SYMBOL(__xa_erase);
 
+/**
+ * xa_erase() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * This function is the equivalent of calling xa_store() with %NULL as
+ * the third argument.  The XArray does not need to allocate memory, so
+ * the user does not need to provide GFP flags.
+ *
+ * Context: Any context.  Takes and releases the xa_lock.
+ * Return: The entry which used to be at this index.
+ */
+void *xa_erase(struct xarray *xa, unsigned long index)
+{
+	void *entry;
+
+	xa_lock(xa);
+	entry = __xa_erase(xa, index);
+	xa_unlock(xa);
+
+	return entry;
+}
+EXPORT_SYMBOL(xa_erase);
+
 /**
  * xa_store() - Store this entry in the XArray.
  * @xa: XArray.
-- 
cgit v1.2.3


From 84e5acb76dacb8ebd648a86a53907ce0dd616534 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 26 Oct 2018 14:41:29 -0400
Subject: XArray: Add xa_store_bh() and xa_store_irq()

These convenience wrappers disable interrupts while taking the spinlock.
A number of drivers would otherwise have to open-code these functions.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  5 +++-
 include/linux/xarray.h            | 52 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 65c77a81b689..8a6e2087de77 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -167,6 +167,8 @@ Takes RCU read lock:
 
 Takes xa_lock internally:
  * :c:func:`xa_store`
+ * :c:func:`xa_store_bh`
+ * :c:func:`xa_store_irq`
  * :c:func:`xa_insert`
  * :c:func:`xa_erase`
  * :c:func:`xa_erase_bh`
@@ -247,7 +249,8 @@ Sharing the XArray with interrupt context is also possible, either
 using :c:func:`xa_lock_irqsave` in both the interrupt handler and process
 context, or :c:func:`xa_lock_irq` in process context and :c:func:`xa_lock`
 in the interrupt handler.  Some of the more common patterns have helper
-functions such as :c:func:`xa_erase_bh` and :c:func:`xa_erase_irq`.
+functions such as :c:func:`xa_store_bh`, :c:func:`xa_store_irq`,
+:c:func:`xa_erase_bh` and :c:func:`xa_erase_irq`.
 
 Sometimes you need to protect access to the XArray with a mutex because
 that lock sits above another mutex in the locking hierarchy.  That does
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 4c839c17a99b..52d9732e4ec4 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -426,6 +426,58 @@ static inline int __xa_insert(struct xarray *xa, unsigned long index,
 	return -EEXIST;
 }
 
+/**
+ * xa_store_bh() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * This function is like calling xa_store() except it disables softirqs
+ * while holding the array lock.
+ *
+ * Context: Any context.  Takes and releases the xa_lock while
+ * disabling softirqs.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_store_bh(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock_bh(xa);
+	curr = __xa_store(xa, index, entry, gfp);
+	xa_unlock_bh(xa);
+
+	return curr;
+}
+
+/**
+ * xa_store_irq() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * This function is like calling xa_store() except it disables interrupts
+ * while holding the array lock.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_store_irq(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock_irq(xa);
+	curr = __xa_store(xa, index, entry, gfp);
+	xa_unlock_irq(xa);
+
+	return curr;
+}
+
 /**
  * xa_erase_bh() - Erase this entry from the XArray.
  * @xa: XArray.
-- 
cgit v1.2.3


From 804dfaf01bcc9daa4298c608ba9018abf616ec48 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 16:37:15 -0500
Subject: XArray: Fix Documentation

Minor fixes.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  6 +++++-
 include/linux/xarray.h            |  4 ++--
 lib/xarray.c                      | 10 +++++-----
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 616ac406bf86..dbe96cb5558e 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -74,7 +74,8 @@ using :c:func:`xa_load`.  xa_store will overwrite any entry with the
 new entry and return the previous entry stored at that index.  You can
 use :c:func:`xa_erase` instead of calling :c:func:`xa_store` with a
 ``NULL`` entry.  There is no difference between an entry that has never
-been stored to and one that has most recently had ``NULL`` stored to it.
+been stored to, one that has been erased and one that has most recently
+had ``NULL`` stored to it.
 
 You can conditionally replace an entry at an index by using
 :c:func:`xa_cmpxchg`.  Like :c:func:`cmpxchg`, it will only succeed if
@@ -114,6 +115,9 @@ unused entry.  If another user has stored to the entry in the meantime,
 :c:func:`xa_release` will do nothing; if instead you want the entry to
 become ``NULL``, you should use :c:func:`xa_erase`.
 
+If all entries in the array are ``NULL``, the :c:func:`xa_empty` function
+will return ``true``.
+
 Finally, you can remove all entries from an XArray by calling
 :c:func:`xa_destroy`.  If the XArray entries are pointers, you may wish
 to free the entries first.  You can do this by iterating over all present
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 52d9732e4ec4..564892e19f8c 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -487,7 +487,7 @@ static inline void *xa_store_irq(struct xarray *xa, unsigned long index,
  * the third argument.  The XArray does not need to allocate memory, so
  * the user does not need to provide GFP flags.
  *
- * Context: Process context.  Takes and releases the xa_lock while
+ * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.
  * Return: The entry which used to be at this index.
  */
@@ -622,7 +622,7 @@ static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry,
  * Updates the @id pointer with the index, then stores the entry at that
  * index.  A concurrent lookup will not see an uninitialised @id.
  *
- * Context: Process context.  Takes and releases the xa_lock while
+ * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
  * there is no more space in the XArray.
diff --git a/lib/xarray.c b/lib/xarray.c
index c3e2084aa313..7946380cd6c9 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -610,8 +610,8 @@ static int xas_expand(struct xa_state *xas, void *head)
  * (see the xa_cmpxchg() implementation for an example).
  *
  * Return: If the slot already existed, returns the contents of this slot.
- * If the slot was newly created, returns NULL.  If it failed to create the
- * slot, returns NULL and indicates the error in @xas.
+ * If the slot was newly created, returns %NULL.  If it failed to create the
+ * slot, returns %NULL and indicates the error in @xas.
  */
 static void *xas_create(struct xa_state *xas)
 {
@@ -1640,7 +1640,7 @@ EXPORT_SYMBOL(__xa_alloc);
  * @index: Index of entry.
  * @mark: Mark number.
  *
- * Attempting to set a mark on a NULL entry does not succeed.
+ * Attempting to set a mark on a %NULL entry does not succeed.
  *
  * Context: Any context.  Expects xa_lock to be held on entry.
  */
@@ -1710,7 +1710,7 @@ EXPORT_SYMBOL(xa_get_mark);
  * @index: Index of entry.
  * @mark: Mark number.
  *
- * Attempting to set a mark on a NULL entry does not succeed.
+ * Attempting to set a mark on a %NULL entry does not succeed.
  *
  * Context: Process context.  Takes and releases the xa_lock.
  */
@@ -1879,7 +1879,7 @@ static unsigned int xas_extract_marked(struct xa_state *xas, void **dst,
  *
  * The @filter may be an XArray mark value, in which case entries which are
  * marked with that mark will be copied.  It may also be %XA_PRESENT, in
- * which case all entries which are not NULL will be copied.
+ * which case all entries which are not %NULL will be copied.
  *
  * The entries returned may not represent a snapshot of the XArray at a
  * moment in time.  For example, if another thread stores to index 5, then
-- 
cgit v1.2.3


From df18bfd35bbf7cb1a420b5beede1de29343793b3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 07:09:07 -0400
Subject: media: v4l: fix uapi mpeg slice params definition

We get a headers_check warning about the newly defined ioctl command
structures:

./usr/include/linux/v4l2-controls.h:1105: found __[us]{8,16,32,64} type without #include <linux/types.h>

This is resolved by including linux/types.h, as suggested by the
warning, but there is another problem: Three of the four structures
have an odd number of __u8 headers, but are aligned to 32 bit in the
v4l2_ctrl_mpeg2_slice_params, so we get an implicit padding byte
for each one. To solve that, let's add explicit padding that can
be set to zero and verified in the kernel.

Fixes: c27bb30e7b6d ("media: v4l: Add definitions for MPEG-2 slice format and metadata")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 5 +++++
 include/uapi/linux/v4l2-controls.h   | 7 +++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 6e37950292cd..5f2b033a7a42 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -1664,6 +1664,11 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx,
 		    p_mpeg2_slice_params->forward_ref_index >= VIDEO_MAX_FRAME)
 			return -EINVAL;
 
+		if (p_mpeg2_slice_params->pad ||
+		    p_mpeg2_slice_params->picture.pad ||
+		    p_mpeg2_slice_params->sequence.pad)
+			return -EINVAL;
+
 		return 0;
 
 	case V4L2_CTRL_TYPE_MPEG2_QUANTIZATION:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 86a54916206f..998983a6e6b7 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -47,11 +47,11 @@
  *  videodev2.h.
  */
 
-#include <linux/types.h>
-
 #ifndef __LINUX_V4L2_CONTROLS_H
 #define __LINUX_V4L2_CONTROLS_H
 
+#include <linux/types.h>
+
 /* Control classes */
 #define V4L2_CTRL_CLASS_USER		0x00980000	/* Old-style 'user' controls */
 #define V4L2_CTRL_CLASS_MPEG		0x00990000	/* MPEG-compression controls */
@@ -1112,6 +1112,7 @@ struct v4l2_mpeg2_sequence {
 	__u8	profile_and_level_indication;
 	__u8	progressive_sequence;
 	__u8	chroma_format;
+	__u8	pad;
 };
 
 struct v4l2_mpeg2_picture {
@@ -1130,6 +1131,7 @@ struct v4l2_mpeg2_picture {
 	__u8	alternate_scan;
 	__u8	repeat_first_field;
 	__u8	progressive_frame;
+	__u8	pad;
 };
 
 struct v4l2_ctrl_mpeg2_slice_params {
@@ -1144,6 +1146,7 @@ struct v4l2_ctrl_mpeg2_slice_params {
 
 	__u8	backward_ref_index;
 	__u8	forward_ref_index;
+	__u8	pad;
 };
 
 struct v4l2_ctrl_mpeg2_quantization {
-- 
cgit v1.2.3


From ef86eaf97acd6d82cd3fd40f997b1c8c4895a443 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 18 Oct 2018 14:54:29 -0400
Subject: media: Rename vb2_m2m_request_queue -> v4l2_m2m_request_queue

To be consistent with the rest of the mem2mem helpers,
rename vb2_m2m_request_queue to v4l2_m2m_request_queue.

This is just a cosmetic change.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/vim2m.c              | 2 +-
 drivers/media/v4l2-core/v4l2-mem2mem.c      | 4 ++--
 drivers/staging/media/sunxi/cedrus/cedrus.c | 2 +-
 include/media/v4l2-mem2mem.h                | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c
index af150a0395df..d82db738f174 100644
--- a/drivers/media/platform/vim2m.c
+++ b/drivers/media/platform/vim2m.c
@@ -1009,7 +1009,7 @@ static const struct v4l2_m2m_ops m2m_ops = {
 
 static const struct media_device_ops m2m_media_ops = {
 	.req_validate = vb2_request_validate,
-	.req_queue = vb2_m2m_request_queue,
+	.req_queue = v4l2_m2m_request_queue,
 };
 
 static int vim2m_probe(struct platform_device *pdev)
diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index d7806db222d8..1ed2465972ac 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -953,7 +953,7 @@ void v4l2_m2m_buf_queue(struct v4l2_m2m_ctx *m2m_ctx,
 }
 EXPORT_SYMBOL_GPL(v4l2_m2m_buf_queue);
 
-void vb2_m2m_request_queue(struct media_request *req)
+void v4l2_m2m_request_queue(struct media_request *req)
 {
 	struct media_request_object *obj, *obj_safe;
 	struct v4l2_m2m_ctx *m2m_ctx = NULL;
@@ -997,7 +997,7 @@ void vb2_m2m_request_queue(struct media_request *req)
 	if (m2m_ctx)
 		v4l2_m2m_try_schedule(m2m_ctx);
 }
-EXPORT_SYMBOL_GPL(vb2_m2m_request_queue);
+EXPORT_SYMBOL_GPL(v4l2_m2m_request_queue);
 
 /* Videobuf2 ioctl helpers */
 
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 82558455384a..dd121f66fa2d 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -253,7 +253,7 @@ static const struct v4l2_m2m_ops cedrus_m2m_ops = {
 
 static const struct media_device_ops cedrus_m2m_media_ops = {
 	.req_validate	= cedrus_request_validate,
-	.req_queue	= vb2_m2m_request_queue,
+	.req_queue	= v4l2_m2m_request_queue,
 };
 
 static int cedrus_probe(struct platform_device *pdev)
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index 58c1ecf3d648..5467264771ec 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -624,7 +624,7 @@ v4l2_m2m_dst_buf_remove_by_idx(struct v4l2_m2m_ctx *m2m_ctx, unsigned int idx)
 
 /* v4l2 request helper */
 
-void vb2_m2m_request_queue(struct media_request *req);
+void v4l2_m2m_request_queue(struct media_request *req);
 
 /* v4l2 ioctl helpers */
 
-- 
cgit v1.2.3


From 99b77fef3c6c69bb7664f1ac97a69d5b17968dae Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Wed, 31 Oct 2018 12:20:28 +0200
Subject: net/mlx5: Fix XRC SRQ umem valid bits

Adapt XRC SRQ to the latest HW specification with fixed definition
around umem valid bits. The previous definition relied on a bit which
was taken for other purposes in legacy FW.

Fixes: bd37197554eb ("net/mlx5: Update mlx5_ifc with DEVX UID bits")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dbff9ff28f2c..34e17e6f8942 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2473,14 +2473,15 @@ struct mlx5_ifc_xrc_srqc_bits {
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         dbr_umem_valid[0x1];
+	u8         reserved_at_22[0x1];
 	u8         rlky[0x1];
 	u8         basic_cyclic_rcv_wqe[0x1];
 	u8         log_rq_stride[0x3];
 	u8         xrcd[0x18];
 
 	u8         page_offset[0x6];
-	u8         reserved_at_46[0x2];
+	u8         reserved_at_46[0x1];
+	u8         dbr_umem_valid[0x1];
 	u8         cqn[0x18];
 
 	u8         reserved_at_60[0x20];
@@ -6689,9 +6690,12 @@ struct mlx5_ifc_create_xrc_srq_in_bits {
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_at_280[0x40];
+	u8         reserved_at_280[0x60];
+
 	u8         xrc_srq_umem_valid[0x1];
-	u8         reserved_at_2c1[0x5bf];
+	u8         reserved_at_2e1[0x1f];
+
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
-- 
cgit v1.2.3


From 781f0766cc41a9dd2e5d118ef4b1d5d89430257b Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 19 Oct 2018 16:14:50 +0800
Subject: USB: Wait for extra delay time after USB_PORT_FEAT_RESET for quirky
 hub

Devices connected under Terminus Technology Inc. Hub (1a40:0101) may
fail to work after the system resumes from suspend:
[  206.063325] usb 3-2.4: reset full-speed USB device number 4 using xhci_hcd
[  206.143691] usb 3-2.4: device descriptor read/64, error -32
[  206.351671] usb 3-2.4: device descriptor read/64, error -32

Info for this hub:
T:  Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#=  2 Spd=480 MxCh= 4
D:  Ver= 2.00 Cls=09(hub  ) Sub=00 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=1a40 ProdID=0101 Rev=01.11
S:  Product=USB 2.0 Hub
C:  #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=100mA
I:  If#= 0 Alt= 0 #EPs= 1 Cls=09(hub  ) Sub=00 Prot=00 Driver=hub

Some expirements indicate that the USB devices connected to the hub are
innocent, it's the hub itself is to blame. The hub needs extra delay
time after it resets its port.

Hence wait for extra delay, if the device is connected to this quirky
hub.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: stable <stable@vger.kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  2 ++
 drivers/usb/core/hub.c                          | 14 +++++++++++---
 drivers/usb/core/quirks.c                       |  6 ++++++
 include/linux/usb/quirks.h                      |  3 +++
 4 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 81d1d5a74728..19f4423e70d9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4713,6 +4713,8 @@
 					prevent spurious wakeup);
 				n = USB_QUIRK_DELAY_CTRL_MSG (Device needs a
 					pause after every control message);
+				o = USB_QUIRK_HUB_SLOW_RESET (Hub needs extra
+					delay after resetting its port);
 			Example: quirks=0781:5580:bk,0a5c:5834:gij
 
 	usbhid.mousepoll=
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index c6077d582d29..d9bd7576786a 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2794,6 +2794,7 @@ static int hub_port_reset(struct usb_hub *hub, int port1,
 	int i, status;
 	u16 portchange, portstatus;
 	struct usb_port *port_dev = hub->ports[port1 - 1];
+	int reset_recovery_time;
 
 	if (!hub_is_superspeed(hub->hdev)) {
 		if (warm) {
@@ -2885,11 +2886,18 @@ static int hub_port_reset(struct usb_hub *hub, int port1,
 
 done:
 	if (status == 0) {
-		/* TRSTRCY = 10 ms; plus some extra */
 		if (port_dev->quirks & USB_PORT_QUIRK_FAST_ENUM)
 			usleep_range(10000, 12000);
-		else
-			msleep(10 + 40);
+		else {
+			/* TRSTRCY = 10 ms; plus some extra */
+			reset_recovery_time = 10 + 40;
+
+			/* Hub needs extra delay after resetting its port. */
+			if (hub->hdev->quirks & USB_QUIRK_HUB_SLOW_RESET)
+				reset_recovery_time += 100;
+
+			msleep(reset_recovery_time);
+		}
 
 		if (udev) {
 			struct usb_hcd *hcd = bus_to_hcd(udev->bus);
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 178d6c6063c0..4d7d948eae63 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -128,6 +128,9 @@ static int quirks_param_set(const char *val, const struct kernel_param *kp)
 			case 'n':
 				flags |= USB_QUIRK_DELAY_CTRL_MSG;
 				break;
+			case 'o':
+				flags |= USB_QUIRK_HUB_SLOW_RESET;
+				break;
 			/* Ignore unrecognized flag characters */
 			}
 		}
@@ -380,6 +383,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
 			USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+	/* Terminus Technology Inc. Hub */
+	{ USB_DEVICE(0x1a40, 0x0101), .driver_info = USB_QUIRK_HUB_SLOW_RESET },
+
 	/* Corsair K70 RGB */
 	{ USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT },
 
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index b7a99ce56bc9..a1be64c9940f 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -66,4 +66,7 @@
 /* Device needs a pause after every control message. */
 #define USB_QUIRK_DELAY_CTRL_MSG		BIT(13)
 
+/* Hub needs extra delay after resetting its port. */
+#define USB_QUIRK_HUB_SLOW_RESET		BIT(14)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From a4310fa2f24687888ce80fdb0e88583561a23700 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 31 Oct 2018 10:37:46 +0100
Subject: can: dev: can_get_echo_skb(): factor out non sending code to
 __can_get_echo_skb()

This patch factors out all non sending parts of can_get_echo_skb() into
a seperate function __can_get_echo_skb(), so that it can be re-used in
an upcoming patch.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   | 36 +++++++++++++++++++++++++-----------
 include/linux/can/dev.h |  1 +
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 49163570a63a..80530ab37b1e 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -477,14 +477,7 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(can_put_echo_skb);
 
-/*
- * Get the skb from the stack and loop it back locally
- *
- * The function is typically called when the TX done interrupt
- * is handled in the device driver. The driver must protect
- * access to priv->echo_skb, if necessary.
- */
-unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
+struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
@@ -495,13 +488,34 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
 		struct can_frame *cf = (struct can_frame *)skb->data;
 		u8 dlc = cf->can_dlc;
 
-		netif_rx(priv->echo_skb[idx]);
+		*len_ptr = dlc;
 		priv->echo_skb[idx] = NULL;
 
-		return dlc;
+		return skb;
 	}
 
-	return 0;
+	return NULL;
+}
+
+/*
+ * Get the skb from the stack and loop it back locally
+ *
+ * The function is typically called when the TX done interrupt
+ * is handled in the device driver. The driver must protect
+ * access to priv->echo_skb, if necessary.
+ */
+unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
+{
+	struct sk_buff *skb;
+	u8 len;
+
+	skb = __can_get_echo_skb(dev, idx, &len);
+	if (!skb)
+		return 0;
+
+	netif_rx(skb);
+
+	return len;
 }
 EXPORT_SYMBOL_GPL(can_get_echo_skb);
 
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index a83e1f632eb7..f01623aef2f7 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -169,6 +169,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
 
 void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 		      unsigned int idx);
+struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr);
 unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx);
 void can_free_echo_skb(struct net_device *dev, unsigned int idx);
 
-- 
cgit v1.2.3


From 55059f2b7f868cd43b3ad30e28e18347e1b46ace Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 18 Sep 2018 11:40:38 +0200
Subject: can: rx-offload: introduce can_rx_offload_get_echo_skb() and
 can_rx_offload_queue_sorted() functions

Current CAN framework can't guarantee proper/chronological order
of RX and TX-ECHO messages. To make this possible, drivers should use
this functions instead of can_get_echo_skb().

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/rx-offload.c   | 46 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/can/rx-offload.h |  4 ++++
 2 files changed, 50 insertions(+)

(limited to 'include')

diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c
index c7d05027a7a0..c368686e2164 100644
--- a/drivers/net/can/rx-offload.c
+++ b/drivers/net/can/rx-offload.c
@@ -211,6 +211,52 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload)
 }
 EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo);
 
+int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
+				struct sk_buff *skb, u32 timestamp)
+{
+	struct can_rx_offload_cb *cb;
+	unsigned long flags;
+
+	if (skb_queue_len(&offload->skb_queue) >
+	    offload->skb_queue_len_max)
+		return -ENOMEM;
+
+	cb = can_rx_offload_get_cb(skb);
+	cb->timestamp = timestamp;
+
+	spin_lock_irqsave(&offload->skb_queue.lock, flags);
+	__skb_queue_add_sort(&offload->skb_queue, skb, can_rx_offload_compare);
+	spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
+
+	can_rx_offload_schedule(offload);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(can_rx_offload_queue_sorted);
+
+unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
+					 unsigned int idx, u32 timestamp)
+{
+	struct net_device *dev = offload->dev;
+	struct net_device_stats *stats = &dev->stats;
+	struct sk_buff *skb;
+	u8 len;
+	int err;
+
+	skb = __can_get_echo_skb(dev, idx, &len);
+	if (!skb)
+		return 0;
+
+	err = can_rx_offload_queue_sorted(offload, skb, timestamp);
+	if (err) {
+		stats->rx_errors++;
+		stats->tx_fifo_errors++;
+	}
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb);
+
 int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb)
 {
 	if (skb_queue_len(&offload->skb_queue) >
diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index cb31683bbe15..01a7c9e5d8d8 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -41,6 +41,10 @@ int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *
 int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight);
 int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg);
 int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload);
+int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
+				struct sk_buff *skb, u32 timestamp);
+unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
+					 unsigned int idx, u32 timestamp);
 int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb);
 void can_rx_offload_reset(struct can_rx_offload *offload);
 void can_rx_offload_del(struct can_rx_offload *offload);
-- 
cgit v1.2.3


From 4530ec36bb1e0d24f41c33229694adacda3d5d89 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 18 Sep 2018 11:40:40 +0200
Subject: can: rx-offload: rename can_rx_offload_irq_queue_err_skb() to
 can_rx_offload_queue_tail()

This function has nothing todo with error.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c      | 4 ++--
 drivers/net/can/rx-offload.c   | 5 +++--
 include/linux/can/rx-offload.h | 3 ++-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 41a175f80c4b..5923bd0ec118 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -610,7 +610,7 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr)
 	if (tx_errors)
 		dev->stats.tx_errors++;
 
-	can_rx_offload_irq_queue_err_skb(&priv->offload, skb);
+	can_rx_offload_queue_tail(&priv->offload, skb);
 }
 
 static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
@@ -650,7 +650,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
 	if (unlikely(new_state == CAN_STATE_BUS_OFF))
 		can_bus_off(dev);
 
-	can_rx_offload_irq_queue_err_skb(&priv->offload, skb);
+	can_rx_offload_queue_tail(&priv->offload, skb);
 }
 
 static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload)
diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c
index c368686e2164..2ce4fa8698c7 100644
--- a/drivers/net/can/rx-offload.c
+++ b/drivers/net/can/rx-offload.c
@@ -257,7 +257,8 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
 }
 EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb);
 
-int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb)
+int can_rx_offload_queue_tail(struct can_rx_offload *offload,
+			      struct sk_buff *skb)
 {
 	if (skb_queue_len(&offload->skb_queue) >
 	    offload->skb_queue_len_max)
@@ -268,7 +269,7 @@ int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_b
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(can_rx_offload_irq_queue_err_skb);
+EXPORT_SYMBOL_GPL(can_rx_offload_queue_tail);
 
 static int can_rx_offload_init_queue(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight)
 {
diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index 01a7c9e5d8d8..8268811a697e 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -45,7 +45,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
 				struct sk_buff *skb, u32 timestamp);
 unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
 					 unsigned int idx, u32 timestamp);
-int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb);
+int can_rx_offload_queue_tail(struct can_rx_offload *offload,
+			      struct sk_buff *skb);
 void can_rx_offload_reset(struct can_rx_offload *offload);
 void can_rx_offload_del(struct can_rx_offload *offload);
 void can_rx_offload_enable(struct can_rx_offload *offload);
-- 
cgit v1.2.3


From d19f9130b814d33c03118493c17454f7d90075d1 Mon Sep 17 00:00:00 2001
From: Elvira Khabirova <lineprinter@altlinux.org>
Date: Sat, 10 Nov 2018 04:22:09 +0100
Subject: x86/ptrace: Fix documentation for tracehook_report_syscall_entry()

tracehook_report_syscall_entry() is called not only
if %TIF_SYSCALL_TRACE is set, but also if %TIF_SYSCALL_EMU is set,
as appears from x86's entry code.

Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: ldv@altlinux.org
Cc: oleg@redhat.com
Cc: rostedt@goodmis.org
Link: http://lkml.kernel.org/r/20181110042209.26333972@akathisia
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tracehook.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 40b0b4c1bf7b..df20f8bdbfa3 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -83,8 +83,8 @@ static inline int ptrace_report_syscall(struct pt_regs *regs)
  * tracehook_report_syscall_entry - task is about to attempt a system call
  * @regs:		user register state of current task
  *
- * This will be called if %TIF_SYSCALL_TRACE has been set, when the
- * current task has just entered the kernel for a system call.
+ * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set,
+ * when the current task has just entered the kernel for a system call.
  * Full user register state is available here.  Changing the values
  * in @regs can affect the system call number and arguments to be tried.
  * It is safe to block here, preventing the system call from beginning.
-- 
cgit v1.2.3


From 7150ceaacb27f7b3bf494e72cd4be4e11612dfff Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 12 Nov 2018 22:33:22 +0000
Subject: rxrpc: Fix life check

The life-checking function, which is used by kAFS to make sure that a call
is still live in the event of a pending signal, only samples the received
packet serial number counter; it doesn't actually provoke a change in the
counter, rather relying on the server to happen to give us a packet in the
time window.

Fix this by adding a function to force a ping to be transmitted.

kAFS then keeps track of whether there's been a stall, and if so, uses the
new function to ping the server, resetting the timeout to allow the reply
to come back.

If there's a stall, a ping and the call is *still* stalled in the same
place after another period, then the call will be aborted.

Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals")
Fixes: f4d15fb6f99a ("rxrpc: Provide functions for allowing cleaner handling of signals")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/rxrpc.txt | 17 +++++++++++------
 fs/afs/rxrpc.c                     | 11 ++++++++++-
 include/net/af_rxrpc.h             |  3 ++-
 include/trace/events/rxrpc.h       |  2 ++
 net/rxrpc/af_rxrpc.c               | 27 +++++++++++++++++++++++----
 5 files changed, 48 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 605e00cdd6be..89f1302d593a 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -1056,18 +1056,23 @@ The kernel interface functions are as follows:
 
 	u32 rxrpc_kernel_check_life(struct socket *sock,
 				    struct rxrpc_call *call);
+	void rxrpc_kernel_probe_life(struct socket *sock,
+				     struct rxrpc_call *call);
 
-     This returns a number that is updated when ACKs are received from the peer
-     (notably including PING RESPONSE ACKs which we can elicit by sending PING
-     ACKs to see if the call still exists on the server).  The caller should
-     compare the numbers of two calls to see if the call is still alive after
-     waiting for a suitable interval.
+     The first function returns a number that is updated when ACKs are received
+     from the peer (notably including PING RESPONSE ACKs which we can elicit by
+     sending PING ACKs to see if the call still exists on the server).  The
+     caller should compare the numbers of two calls to see if the call is still
+     alive after waiting for a suitable interval.
 
      This allows the caller to work out if the server is still contactable and
      if the call is still alive on the server whilst waiting for the server to
      process a client operation.
 
-     This function may transmit a PING ACK.
+     The second function causes a ping ACK to be transmitted to try to provoke
+     the peer into responding, which would then cause the value returned by the
+     first function to change.  Note that this must be called in TASK_RUNNING
+     state.
 
  (*) Get reply timestamp.
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 59970886690f..a7b44863d502 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -576,6 +576,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 {
 	signed long rtt2, timeout;
 	long ret;
+	bool stalled = false;
 	u64 rtt;
 	u32 life, last_life;
 
@@ -609,12 +610,20 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 
 		life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
 		if (timeout == 0 &&
-		    life == last_life && signal_pending(current))
+		    life == last_life && signal_pending(current)) {
+			if (stalled)
 				break;
+			__set_current_state(TASK_RUNNING);
+			rxrpc_kernel_probe_life(call->net->socket, call->rxcall);
+			timeout = rtt2;
+			stalled = true;
+			continue;
+		}
 
 		if (life != last_life) {
 			timeout = rtt2;
 			last_life = life;
+			stalled = false;
 		}
 
 		timeout = schedule_timeout(timeout);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index de587948042a..1adefe42c0a6 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -77,7 +77,8 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
 			    struct sockaddr_rxrpc *, struct key *);
 int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
 			    enum rxrpc_call_completion *, u32 *);
-u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
+u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
+void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
 u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
 bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
 				 ktime_t *);
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 573d5b901fb1..5b50fe4906d2 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -181,6 +181,7 @@ enum rxrpc_timer_trace {
 enum rxrpc_propose_ack_trace {
 	rxrpc_propose_ack_client_tx_end,
 	rxrpc_propose_ack_input_data,
+	rxrpc_propose_ack_ping_for_check_life,
 	rxrpc_propose_ack_ping_for_keepalive,
 	rxrpc_propose_ack_ping_for_lost_ack,
 	rxrpc_propose_ack_ping_for_lost_reply,
@@ -380,6 +381,7 @@ enum rxrpc_tx_point {
 #define rxrpc_propose_ack_traces \
 	EM(rxrpc_propose_ack_client_tx_end,	"ClTxEnd") \
 	EM(rxrpc_propose_ack_input_data,	"DataIn ") \
+	EM(rxrpc_propose_ack_ping_for_check_life, "ChkLife") \
 	EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \
 	EM(rxrpc_propose_ack_ping_for_lost_ack,	"LostAck") \
 	EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 64362d078da8..a2522f9d71e2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -375,16 +375,35 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
  * getting ACKs from the server.  Returns a number representing the life state
  * which can be compared to that returned by a previous call.
  *
- * If this is a client call, ping ACKs will be sent to the server to find out
- * whether it's still responsive and whether the call is still alive on the
- * server.
+ * If the life state stalls, rxrpc_kernel_probe_life() should be called and
+ * then 2RTT waited.
  */
-u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+u32 rxrpc_kernel_check_life(const struct socket *sock,
+			    const struct rxrpc_call *call)
 {
 	return call->acks_latest;
 }
 EXPORT_SYMBOL(rxrpc_kernel_check_life);
 
+/**
+ * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
+ * find out whether a call is still alive by pinging it.  This should cause the
+ * life state to be bumped in about 2*RTT.
+ *
+ * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
+ */
+void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
+{
+	rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+			  rxrpc_propose_ack_ping_for_check_life);
+	rxrpc_send_ack_packet(call, true, NULL);
+}
+EXPORT_SYMBOL(rxrpc_kernel_probe_life);
+
 /**
  * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
  * @sock: The socket the call is on
-- 
cgit v1.2.3


From 0145b50566e7de5637e80ecba96c7f0e6fff1aad Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 31 Oct 2018 15:20:05 +0100
Subject: iio/hid-sensors: Fix IIO_CHAN_INFO_RAW returning wrong values for
 signed numbers

Before this commit sensor_hub_input_attr_get_raw_value() failed to take
the signedness of 16 and 8 bit values into account, returning e.g.
65436 instead of -100 for the z-axis reading of an accelerometer.

This commit adds a new is_signed parameter to the function and makes all
callers pass the appropriate value for this.

While at it, this commit also fixes up some neighboring lines where
statements were needlessly split over 2 lines to improve readability.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/hid/hid-sensor-custom.c                  |  2 +-
 drivers/hid/hid-sensor-hub.c                     | 13 ++++++++++---
 drivers/iio/accel/hid-sensor-accel-3d.c          |  5 ++++-
 drivers/iio/gyro/hid-sensor-gyro-3d.c            |  5 ++++-
 drivers/iio/humidity/hid-sensor-humidity.c       |  3 ++-
 drivers/iio/light/hid-sensor-als.c               |  8 +++++---
 drivers/iio/light/hid-sensor-prox.c              |  8 +++++---
 drivers/iio/magnetometer/hid-sensor-magn-3d.c    |  8 +++++---
 drivers/iio/orientation/hid-sensor-incl-3d.c     |  8 +++++---
 drivers/iio/pressure/hid-sensor-press.c          |  8 +++++---
 drivers/iio/temperature/hid-sensor-temperature.c |  3 ++-
 drivers/rtc/rtc-hid-sensor-time.c                |  2 +-
 include/linux/hid-sensor-hub.h                   |  4 +++-
 13 files changed, 52 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
index e8a114157f87..bb012bc032e0 100644
--- a/drivers/hid/hid-sensor-custom.c
+++ b/drivers/hid/hid-sensor-custom.c
@@ -358,7 +358,7 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr,
 						sensor_inst->hsdev,
 						sensor_inst->hsdev->usage,
 						usage, report_id,
-						SENSOR_HUB_SYNC);
+						SENSOR_HUB_SYNC, false);
 	} else if (!strncmp(name, "units", strlen("units")))
 		value = sensor_inst->fields[field_index].attribute.units;
 	else if (!strncmp(name, "unit-expo", strlen("unit-expo")))
diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index 2b63487057c2..4256fdc5cd6d 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(sensor_hub_get_feature);
 int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
 					u32 usage_id,
 					u32 attr_usage_id, u32 report_id,
-					enum sensor_hub_read_flags flag)
+					enum sensor_hub_read_flags flag,
+					bool is_signed)
 {
 	struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev);
 	unsigned long flags;
@@ -331,10 +332,16 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
 						&hsdev->pending.ready, HZ*5);
 		switch (hsdev->pending.raw_size) {
 		case 1:
-			ret_val = *(u8 *)hsdev->pending.raw_data;
+			if (is_signed)
+				ret_val = *(s8 *)hsdev->pending.raw_data;
+			else
+				ret_val = *(u8 *)hsdev->pending.raw_data;
 			break;
 		case 2:
-			ret_val = *(u16 *)hsdev->pending.raw_data;
+			if (is_signed)
+				ret_val = *(s16 *)hsdev->pending.raw_data;
+			else
+				ret_val = *(u16 *)hsdev->pending.raw_data;
 			break;
 		case 4:
 			ret_val = *(u32 *)hsdev->pending.raw_data;
diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index 41d97faf5013..38ff374a3ca4 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -149,6 +149,7 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 	struct hid_sensor_hub_device *hsdev =
 					accel_state->common_attributes.hsdev;
 
@@ -158,12 +159,14 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_RAW:
 		hid_sensor_power_state(&accel_state->common_attributes, true);
 		report_id = accel_state->accel[chan->scan_index].report_id;
+		min = accel_state->accel[chan->scan_index].logical_minimum;
 		address = accel_3d_addresses[chan->scan_index];
 		if (report_id >= 0)
 			*val = sensor_hub_input_attr_get_raw_value(
 					accel_state->common_attributes.hsdev,
 					hsdev->usage, address, report_id,
-					SENSOR_HUB_SYNC);
+					SENSOR_HUB_SYNC,
+					min < 0);
 		else {
 			*val = 0;
 			hid_sensor_power_state(&accel_state->common_attributes,
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index 36941e69f959..88e857c4baf4 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -111,6 +111,7 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
@@ -118,13 +119,15 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_RAW:
 		hid_sensor_power_state(&gyro_state->common_attributes, true);
 		report_id = gyro_state->gyro[chan->scan_index].report_id;
+		min = gyro_state->gyro[chan->scan_index].logical_minimum;
 		address = gyro_3d_addresses[chan->scan_index];
 		if (report_id >= 0)
 			*val = sensor_hub_input_attr_get_raw_value(
 					gyro_state->common_attributes.hsdev,
 					HID_USAGE_SENSOR_GYRO_3D, address,
 					report_id,
-					SENSOR_HUB_SYNC);
+					SENSOR_HUB_SYNC,
+					min < 0);
 		else {
 			*val = 0;
 			hid_sensor_power_state(&gyro_state->common_attributes,
diff --git a/drivers/iio/humidity/hid-sensor-humidity.c b/drivers/iio/humidity/hid-sensor-humidity.c
index beab6d6fd6e1..4bc95f31c730 100644
--- a/drivers/iio/humidity/hid-sensor-humidity.c
+++ b/drivers/iio/humidity/hid-sensor-humidity.c
@@ -75,7 +75,8 @@ static int humidity_read_raw(struct iio_dev *indio_dev,
 				HID_USAGE_SENSOR_HUMIDITY,
 				HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY,
 				humid_st->humidity_attr.report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				humid_st->humidity_attr.logical_minimum < 0);
 		hid_sensor_power_state(&humid_st->common_attributes, false);
 
 		return IIO_VAL_INT;
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 406caaee9a3c..94f33250ba5a 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -93,6 +93,7 @@ static int als_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
@@ -102,8 +103,8 @@ static int als_read_raw(struct iio_dev *indio_dev,
 		case  CHANNEL_SCAN_INDEX_INTENSITY:
 		case  CHANNEL_SCAN_INDEX_ILLUM:
 			report_id = als_state->als_illum.report_id;
-			address =
-			HID_USAGE_SENSOR_LIGHT_ILLUM;
+			min = als_state->als_illum.logical_minimum;
+			address = HID_USAGE_SENSOR_LIGHT_ILLUM;
 			break;
 		default:
 			report_id = -1;
@@ -116,7 +117,8 @@ static int als_read_raw(struct iio_dev *indio_dev,
 					als_state->common_attributes.hsdev,
 					HID_USAGE_SENSOR_ALS, address,
 					report_id,
-					SENSOR_HUB_SYNC);
+					SENSOR_HUB_SYNC,
+					min < 0);
 			hid_sensor_power_state(&als_state->common_attributes,
 						false);
 		} else {
diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c
index 45107f7537b5..cf5a0c242609 100644
--- a/drivers/iio/light/hid-sensor-prox.c
+++ b/drivers/iio/light/hid-sensor-prox.c
@@ -73,6 +73,7 @@ static int prox_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
@@ -81,8 +82,8 @@ static int prox_read_raw(struct iio_dev *indio_dev,
 		switch (chan->scan_index) {
 		case  CHANNEL_SCAN_INDEX_PRESENCE:
 			report_id = prox_state->prox_attr.report_id;
-			address =
-			HID_USAGE_SENSOR_HUMAN_PRESENCE;
+			min = prox_state->prox_attr.logical_minimum;
+			address = HID_USAGE_SENSOR_HUMAN_PRESENCE;
 			break;
 		default:
 			report_id = -1;
@@ -95,7 +96,8 @@ static int prox_read_raw(struct iio_dev *indio_dev,
 				prox_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_PROX, address,
 				report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				min < 0);
 			hid_sensor_power_state(&prox_state->common_attributes,
 						false);
 		} else {
diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
index d55c4885211a..f3c0d41e5a8c 100644
--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c
+++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
@@ -163,21 +163,23 @@ static int magn_3d_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
 		hid_sensor_power_state(&magn_state->magn_flux_attributes, true);
-		report_id =
-			magn_state->magn[chan->address].report_id;
+		report_id = magn_state->magn[chan->address].report_id;
+		min = magn_state->magn[chan->address].logical_minimum;
 		address = magn_3d_addresses[chan->address];
 		if (report_id >= 0)
 			*val = sensor_hub_input_attr_get_raw_value(
 				magn_state->magn_flux_attributes.hsdev,
 				HID_USAGE_SENSOR_COMPASS_3D, address,
 				report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				min < 0);
 		else {
 			*val = 0;
 			hid_sensor_power_state(
diff --git a/drivers/iio/orientation/hid-sensor-incl-3d.c b/drivers/iio/orientation/hid-sensor-incl-3d.c
index 1e5451d1ff88..bdc5e4554ee4 100644
--- a/drivers/iio/orientation/hid-sensor-incl-3d.c
+++ b/drivers/iio/orientation/hid-sensor-incl-3d.c
@@ -111,21 +111,23 @@ static int incl_3d_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
 		hid_sensor_power_state(&incl_state->common_attributes, true);
-		report_id =
-			incl_state->incl[chan->scan_index].report_id;
+		report_id = incl_state->incl[chan->scan_index].report_id;
+		min = incl_state->incl[chan->scan_index].logical_minimum;
 		address = incl_3d_addresses[chan->scan_index];
 		if (report_id >= 0)
 			*val = sensor_hub_input_attr_get_raw_value(
 				incl_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_INCLINOMETER_3D, address,
 				report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				min < 0);
 		else {
 			hid_sensor_power_state(&incl_state->common_attributes,
 						false);
diff --git a/drivers/iio/pressure/hid-sensor-press.c b/drivers/iio/pressure/hid-sensor-press.c
index 4c437918f1d2..d7b1c00ceb4d 100644
--- a/drivers/iio/pressure/hid-sensor-press.c
+++ b/drivers/iio/pressure/hid-sensor-press.c
@@ -77,6 +77,7 @@ static int press_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
@@ -85,8 +86,8 @@ static int press_read_raw(struct iio_dev *indio_dev,
 		switch (chan->scan_index) {
 		case  CHANNEL_SCAN_INDEX_PRESSURE:
 			report_id = press_state->press_attr.report_id;
-			address =
-			HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE;
+			min = press_state->press_attr.logical_minimum;
+			address = HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE;
 			break;
 		default:
 			report_id = -1;
@@ -99,7 +100,8 @@ static int press_read_raw(struct iio_dev *indio_dev,
 				press_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_PRESSURE, address,
 				report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				min < 0);
 			hid_sensor_power_state(&press_state->common_attributes,
 						false);
 		} else {
diff --git a/drivers/iio/temperature/hid-sensor-temperature.c b/drivers/iio/temperature/hid-sensor-temperature.c
index beaf6fd3e337..b592fc4f007e 100644
--- a/drivers/iio/temperature/hid-sensor-temperature.c
+++ b/drivers/iio/temperature/hid-sensor-temperature.c
@@ -76,7 +76,8 @@ static int temperature_read_raw(struct iio_dev *indio_dev,
 			HID_USAGE_SENSOR_TEMPERATURE,
 			HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE,
 			temp_st->temperature_attr.report_id,
-			SENSOR_HUB_SYNC);
+			SENSOR_HUB_SYNC,
+			temp_st->temperature_attr.logical_minimum < 0);
 		hid_sensor_power_state(
 				&temp_st->common_attributes,
 				false);
diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c
index 2751dba850c6..3e1abb455472 100644
--- a/drivers/rtc/rtc-hid-sensor-time.c
+++ b/drivers/rtc/rtc-hid-sensor-time.c
@@ -213,7 +213,7 @@ static int hid_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* get a report with all values through requesting one value */
 	sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev,
 			HID_USAGE_SENSOR_TIME, hid_time_addresses[0],
-			time_state->info[0].report_id, SENSOR_HUB_SYNC);
+			time_state->info[0].report_id, SENSOR_HUB_SYNC, false);
 	/* wait for all values (event) */
 	ret = wait_for_completion_killable_timeout(
 			&time_state->comp_last_time, HZ*6);
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 331dc377c275..dc12f5c4b076 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -177,6 +177,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
 * @attr_usage_id:	Attribute usage id as per spec
 * @report_id:	Report id to look for
 * @flag:      Synchronous or asynchronous read
+* @is_signed:   If true then fields < 32 bits will be sign-extended
 *
 * Issues a synchronous or asynchronous read request for an input attribute.
 * Returns data upto 32 bits.
@@ -190,7 +191,8 @@ enum sensor_hub_read_flags {
 int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
  					u32 usage_id,
  					u32 attr_usage_id, u32 report_id,
- 					enum sensor_hub_read_flags flag
+					enum sensor_hub_read_flags flag,
+					bool is_signed
 );
 
 /**
-- 
cgit v1.2.3


From 69fec325a64383667b8a35df5d48d6ce52fb2782 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 18 Nov 2018 16:14:47 +0800
Subject: Revert "sctp: remove sctp_transport_pmtu_check"

This reverts commit 22d7be267eaa8114dcc28d66c1c347f667d7878a.

The dst's mtu in transport can be updated by a non sctp place like
in xfrm where the MTU information didn't get synced between asoc,
transport and dst, so it is still needed to do the pmtu check
in sctp_packet_config.

Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 12 ++++++++++++
 net/sctp/output.c       |  3 +++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 8c2caa370e0f..ab9242e51d9e 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -608,4 +608,16 @@ static inline __u32 sctp_dst_mtu(const struct dst_entry *dst)
 				 SCTP_DEFAULT_MINSEGMENT));
 }
 
+static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
+{
+	__u32 pmtu = sctp_dst_mtu(t->dst);
+
+	if (t->pathmtu == pmtu)
+		return true;
+
+	t->pathmtu = pmtu;
+
+	return false;
+}
+
 #endif /* __net_sctp_h__ */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 88dfa6ae1fb4..b0e74a3e77ec 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -118,6 +118,9 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 		sctp_transport_route(tp, NULL, sp);
 		if (asoc->param_flags & SPP_PMTUD_ENABLE)
 			sctp_assoc_sync_pmtu(asoc);
+	} else if (!sctp_transport_pmtu_check(tp)) {
+		if (asoc->param_flags & SPP_PMTUD_ENABLE)
+			sctp_assoc_sync_pmtu(asoc);
 	}
 
 	if (asoc->pmtu_pending) {
-- 
cgit v1.2.3


From b34087157dd76e8d96e5e52808134a791ac61e57 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 21 Nov 2018 16:00:50 +0000
Subject: dma-direct: Make DIRECT_MAPPING_ERROR viable for SWIOTLB

With the overflow buffer removed, we no longer have a unique address
which is guaranteed not to be a valid DMA target to use as an error
token. The DIRECT_MAPPING_ERROR value of 0 tries to at least represent
an unlikely DMA target, but unfortunately there are already SWIOTLB
users with DMA-able memory at physical address 0 which now gets falsely
treated as a mapping failure and leads to all manner of misbehaviour.

The best we can do to mitigate that is flip DIRECT_MAPPING_ERROR to the
other commonly-used error value of all-bits-set, since the last single
byte of memory is by far the least-likely-valid DMA target.

Fixes: dff8d6c1ed58 ("swiotlb: remove the overflow buffer")
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-direct.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index bd73e7a91410..9e66bfe369aa 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,7 +5,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mem_encrypt.h>
 
-#define DIRECT_MAPPING_ERROR		0
+#define DIRECT_MAPPING_ERROR		(~(dma_addr_t)0)
 
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
-- 
cgit v1.2.3


From 86de5921a3d5dd246df661e09bdd0a6131b39ae3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 20 Nov 2018 05:53:59 -0800
Subject: tcp: defer SACK compression after DupThresh

Jean-Louis reported a TCP regression and bisected to recent SACK
compression.

After a loss episode (receiver not able to keep up and dropping
packets because its backlog is full), linux TCP stack is sending
a single SACK (DUPACK).

Sender waits a full RTO timer before recovering losses.

While RFC 6675 says in section 5, "Algorithm Details",

   (2) If DupAcks < DupThresh but IsLost (HighACK + 1) returns true --
       indicating at least three segments have arrived above the current
       cumulative acknowledgment point, which is taken to indicate loss
       -- go to step (4).
...
   (4) Invoke fast retransmit and enter loss recovery as follows:

there are old TCP stacks not implementing this strategy, and
still counting the dupacks before starting fast retransmit.

While these stacks probably perform poorly when receivers implement
LRO/GRO, we should be a little more gentle to them.

This patch makes sure we do not enable SACK compression unless
3 dupacks have been sent since last rcv_nxt update.

Ideally we should even rearm the timer to send one or two
more DUPACK if no more packets are coming, but that will
be work aiming for linux-4.21.

Many thanks to Jean-Louis for bisecting the issue, providing
packet captures and testing this patch.

Fixes: 5d9f4262b7ea ("tcp: add SACK compression")
Reported-by: Jean-Louis Dupond <jean-louis@dupond.be>
Tested-by: Jean-Louis Dupond <jean-louis@dupond.be>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 +
 net/ipv4/tcp_input.c  | 14 ++++++++++++--
 net/ipv4/tcp_output.c |  6 +++---
 net/ipv4/tcp_timer.c  |  2 +-
 4 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8ed77bb4ed86..a9b0280687d5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -196,6 +196,7 @@ struct tcp_sock {
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
 	u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */
+	u32	compressed_ack_rcv_nxt;
 
 	u32	tsoffset;	/* timestamp offset */
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e695584bb33f..1e37c1388189 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4268,7 +4268,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 	 * If the sack array is full, forget about the last one.
 	 */
 	if (this_sack >= TCP_NUM_SACKS) {
-		if (tp->compressed_ack)
+		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
 			tcp_send_ack(sk);
 		this_sack--;
 		tp->rx_opt.num_sacks--;
@@ -5189,7 +5189,17 @@ send_now:
 	if (!tcp_is_sack(tp) ||
 	    tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
 		goto send_now;
-	tp->compressed_ack++;
+
+	if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+		tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+			NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+				      tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+		tp->compressed_ack = 0;
+	}
+
+	if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+		goto send_now;
 
 	if (hrtimer_is_queued(&tp->compressed_ack_timer))
 		return;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9c34b97d365d..3f510cad0b3e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (unlikely(tp->compressed_ack)) {
+	if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
 		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
-			      tp->compressed_ack);
-		tp->compressed_ack = 0;
+			      tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+		tp->compressed_ack = TCP_FASTRETRANS_THRESH;
 		if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
 			__sock_put(sk);
 	}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 676020663ce8..5f8b6d3cd855 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -740,7 +740,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
 
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
-		if (tp->compressed_ack)
+		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
 			tcp_send_ack(sk);
 	} else {
 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
-- 
cgit v1.2.3


From f1539a0c2545d7bd82e451bd1464f2a820f55de4 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Wed, 21 Nov 2018 16:27:11 +0100
Subject: Revert "HID: input: Create a utility class for counting scroll
 events"

This reverts commit 1ff2e1a44e02d4bdbb9be67c7d9acc240a67141f.

It turns out the current API is not that compatible with
some Microsoft mice, so better start again from scratch.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Harry Cutts <hcutts@chromium.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 45 ---------------------------------------------
 include/linux/hid.h     | 28 ----------------------------
 2 files changed, 73 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 28ee2ed88a1a..d6fab5798487 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1841,48 +1841,3 @@ void hidinput_disconnect(struct hid_device *hid)
 }
 EXPORT_SYMBOL_GPL(hidinput_disconnect);
 
-/**
- * hid_scroll_counter_handle_scroll() - Send high- and low-resolution scroll
- *                                      events given a high-resolution wheel
- *                                      movement.
- * @counter: a hid_scroll_counter struct describing the wheel.
- * @hi_res_value: the movement of the wheel, in the mouse's high-resolution
- *                units.
- *
- * Given a high-resolution movement, this function converts the movement into
- * microns and emits high-resolution scroll events for the input device. It also
- * uses the multiplier from &struct hid_scroll_counter to emit low-resolution
- * scroll events when appropriate for backwards-compatibility with userspace
- * input libraries.
- */
-void hid_scroll_counter_handle_scroll(struct hid_scroll_counter *counter,
-				      int hi_res_value)
-{
-	int low_res_scroll_amount;
-	/* Some wheels will rest 7/8ths of a notch from the previous notch
-	 * after slow movement, so we want the threshold for low-res events to
-	 * be in the middle of the notches (e.g. after 4/8ths) as opposed to on
-	 * the notches themselves (8/8ths).
-	 */
-	int threshold = counter->resolution_multiplier / 2;
-
-	input_report_rel(counter->dev, REL_WHEEL_HI_RES,
-			 hi_res_value * counter->microns_per_hi_res_unit);
-
-	counter->remainder += hi_res_value;
-	if (abs(counter->remainder) >= threshold) {
-		/* Add (or subtract) 1 because we want to trigger when the wheel
-		 * is half-way to the next notch (i.e. scroll 1 notch after a
-		 * 1/2 notch movement, 2 notches after a 1 1/2 notch movement,
-		 * etc.).
-		 */
-		low_res_scroll_amount =
-			counter->remainder / counter->resolution_multiplier
-			+ (hi_res_value > 0 ? 1 : -1);
-		input_report_rel(counter->dev, REL_WHEEL,
-				 low_res_scroll_amount);
-		counter->remainder -=
-			low_res_scroll_amount * counter->resolution_multiplier;
-	}
-}
-EXPORT_SYMBOL_GPL(hid_scroll_counter_handle_scroll);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 387c70df6f29..a355d61940f2 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1139,34 +1139,6 @@ static inline u32 hid_report_len(struct hid_report *report)
 int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
 		int interrupt);
 
-
-/**
- * struct hid_scroll_counter - Utility class for processing high-resolution
- *                             scroll events.
- * @dev: the input device for which events should be reported.
- * @microns_per_hi_res_unit: the amount moved by the user's finger for each
- *                           high-resolution unit reported by the mouse, in
- *                           microns.
- * @resolution_multiplier: the wheel's resolution in high-resolution mode as a
- *                         multiple of its lower resolution. For example, if
- *                         moving the wheel by one "notch" would result in a
- *                         value of 1 in low-resolution mode but 8 in
- *                         high-resolution, the multiplier is 8.
- * @remainder: counts the number of high-resolution units moved since the last
- *             low-resolution event (REL_WHEEL or REL_HWHEEL) was sent. Should
- *             only be used by class methods.
- */
-struct hid_scroll_counter {
-	struct input_dev *dev;
-	int microns_per_hi_res_unit;
-	int resolution_multiplier;
-
-	int remainder;
-};
-
-void hid_scroll_counter_handle_scroll(struct hid_scroll_counter *counter,
-				      int hi_res_value);
-
 /* HID quirks API */
 unsigned long hid_lookup_quirk(const struct hid_device *hdev);
 int hid_quirks_init(char **quirks_param, __u16 bus, int count);
-- 
cgit v1.2.3


From ffe0e7cf290f5c9d1392134b4ef8da2a3761a4cd Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Wed, 21 Nov 2018 16:27:12 +0100
Subject: Revert "Input: Add the `REL_WHEEL_HI_RES` event code"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit aaf9978c3c0291ef3beaa97610bc9c3084656a85.

Quoting Peter:

There is a HID feature report called "Resolution Multiplier"
Described in the "Enhanced Wheel Support in Windows" doc and
the "USB HID Usage Tables" page 30.

http://download.microsoft.com/download/b/d/1/bd1f7ef4-7d72-419e-bc5c-9f79ad7bb66e/wheel.docx
https://www.usb.org/sites/default/files/documents/hut1_12v2.pdf

This was new for Windows Vista, so we're only a decade behind here. I only
accidentally found this a few days ago while debugging a stuck button on a
Microsoft mouse.

The docs above describe it like this: a wheel control by default sends
value 1 per notch. If the resolution multiplier is active, the wheel is
expected to send a value of $multiplier per notch (e.g. MS Sculpt mouse) or
just send events more often, i.e. for less physical motion (e.g. MS Comfort
mouse).

For the latter, you need the right HW of course. The Sculpt mouse has
tactile wheel clicks, so nothing really changes. The Comfort mouse has
continuous motion with no tactile clicks. Similar to the free-wheeling
Logitech mice but without any inertia.

Note that the doc also says that Vista and onwards *always* enable this
feature where available.

An example HID definition looks like this:

       Usage Page Generic Desktop (0x01)
       Usage Resolution Multiplier (0x48)
       Logical Minimum 0
       Logical Maximum 1
       Physical Minimum 1
       Physical Maximum 16
       Report Size 2 # in bits
       Report Count 1
       Feature (Data, Var, Abs)

So the actual bits have values 0 or 1 and that reflects real values 1 or 16.
We've only seen single-bits so far, so there's low-res and hi-res, but
nothing in between.

The multiplier is available for HID usages "Wheel" and "AC Pan" (horiz wheel).
Microsoft suggests that

> Vendors should ship their devices with smooth scrolling disabled and allow
> Windows to enable it. This ensures that the device works like a regular HID
> device on legacy operating systems that do not support smooth scrolling.
(see the wheel doc linked above)

The mice that we tested so far do reset on unplug.

Device Support looks to be all (?) Microsoft mice but nothing else

Not supported:
- Logitech G500s, G303
- Roccat Kone XTD
- all the cheap Lenovo, HP, Dell, Logitech USB mice that come with a
  workstation that I could find don't have it.
- Etekcity something something
- Razer Imperator

Supported:
- Microsoft Comfort Optical Mouse 3000 - yes, physical: 1:4
- Microsoft Sculpt Ergonomic Mouse - yes, physical: 1:12
- Microsoft Surface mouse - yes, physical: 1:4

So again, I think this is really just available on Microsoft mice, but
probably all decent MS mice released over the last decade.

Looking at the hardware itself:

- no noticeable notches in the weel
- low-res: 18 events per 360deg rotation (click angle 20 deg)
- high-res: 72 events per 360deg → matches multiplier of 4

- I can feel the notches during wheel turns
- low-res: 24 events per 360 deg rotation (click angle 15 deg)
  - horiz wheel is tilt-based, continuous output value 1
- high-res: 24 events per 360deg with value 12 → matches multiplier of 12
  - horiz wheel output rate doubles/triples?, values is 3

- It's a touch strip, not a wheel so no notches
- high-res: events have value 4 instead of 1
  a bit strange given that it doesn't actually have notches.

Ok, why is this an issue for the current API? First, because the logitech
multiplier used in Harry's patches looks suspiciously like the Resolution
Multiplier so I think we should assume it's the same thing. Nestor, can you
shed some light on that?

- `REL_WHEEL` is defined as the number of notches, emulated where needed.
- `REL_WHEEL_HI_RES` is the movement of the user's finger in microns.
- `WM_MOUSEWHEEL` (Windows) is is a multiple of 120, defined as "the threshold
  for action to be taken and one such action"
  https://docs.microsoft.com/en-us/windows/desktop/inputdev/wm-mousewheel

If the multiplier is set to M, this means we need an accumulated value of M
until we can claim there was a wheel click. So after enabling the multiplier
and setting it to the maximum (like Windows):
- M units are 15deg rotation → 1 unit is 2620/M micron (see below). This is
  the `REL_WHEEL_HI_RES` value.
  - wheel diameter 20mm: 15 deg rotation is 2.62mm, 2620 micron (pi * 20mm /
    (360deg/15deg))
- For every M units accumulated, send one `REL_WHEEL` event

The problem here is that we've now hardcoded 20mm/15 deg into the kernel and
we have no way of getting the size of the wheel or the click angle into the
kernel.

In userspace we now have to undo the kernel's calculation. If our click angle
is e.g. 20 degree we have to undo the (lossy) calculation from the kernel and
calculate the correct angle instead. This also means the 15 is a hardcoded
option forever and cannot be changed.

In hid-logitech-hidpp.c, the microns per unit is hardcoded per device.
Harry, did you measure those by hand? We'd need to update the kernel for
every device and there are 10 years worth of devices from MS alone.

The multiplier default is 8 which is in the right ballpark, so I'm pretty
sure this is the same as the Resolution Multiplier, just in HID++ lingo. And
given that the 120 magic factor is what Windows uses in the end, I can't
imagine Logitech rolling their own thing here. Nestor?

And we're already fairly inaccurate with the microns anyway. The MX Anywhere
2S has a click angle of 20 degrees (18 stops) and a 17mm wheel, so a wheel
notch is approximately 2.67mm, one event at multiplier 8 (1/8 of a notch)
would be 334 micron. That's only 80% of the fallback value of 406 in the
kernel. Multiplier 6 gives us 445micron (10% off). I'm assuming multiplier 7
doesn't exist because it's not a factor of 120.

Summary:

Best option may be to simply do what Windows is doing, all the HW manufacturers
have to use that approach after all. Switch `REL_WHEEL_HI_RES` to report in
fractions of 120, with 120 being one notch and divide that by the multiplier
for the actual events. So e.g. the Logitech multiplier 8 would send value 15
for each event in hi-res mode. This can be converted in userspace to
whatever userspace needs (combined with a hwdb there that tells you wheel
size/click angle/...).

Conflicts:
	include/uapi/linux/input-event-codes.h -> I kept the new
         reserved event in the code, so I had to adapt the revert
         slightly

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Harry Cutts <hcutts@chromium.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/input/event-codes.rst    | 11 +----------
 include/uapi/linux/input-event-codes.h | 10 ----------
 2 files changed, 1 insertion(+), 20 deletions(-)

(limited to 'include')

diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst
index cef220c176a4..a8c0873beb95 100644
--- a/Documentation/input/event-codes.rst
+++ b/Documentation/input/event-codes.rst
@@ -190,16 +190,7 @@ A few EV_REL codes have special meanings:
 * REL_WHEEL, REL_HWHEEL:
 
   - These codes are used for vertical and horizontal scroll wheels,
-    respectively. The value is the number of "notches" moved on the wheel, the
-    physical size of which varies by device. For high-resolution wheels (which
-    report multiple events for each notch of movement, or do not have notches)
-    this may be an approximation based on the high-resolution scroll events.
-
-* REL_WHEEL_HI_RES:
-
-  - If a vertical scroll wheel supports high-resolution scrolling, this code
-    will be emitted in addition to REL_WHEEL. The value is the (approximate)
-    distance travelled by the user's finger, in microns.
+    respectively.
 
 EV_ABS
 ------
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 6d180cc60a5d..3eb5a4c3d60a 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -716,7 +716,6 @@
  * the situation described above.
  */
 #define REL_RESERVED		0x0a
-#define REL_WHEEL_HI_RES	0x0b
 #define REL_MAX			0x0f
 #define REL_CNT			(REL_MAX+1)
 
@@ -753,15 +752,6 @@
 
 #define ABS_MISC		0x28
 
-/*
- * 0x2e is reserved and should not be used in input drivers.
- * It was used by HID as ABS_MISC+6 and userspace needs to detect if
- * the next ABS_* event is correct or is just ABS_MISC + n.
- * We define here ABS_RESERVED so userspace can rely on it and detect
- * the situation described above.
- */
-#define ABS_RESERVED		0x2e
-
 #define ABS_MT_SLOT		0x2f	/* MT slot being modified */
 #define ABS_MT_TOUCH_MAJOR	0x30	/* Major axis of touching ellipse */
 #define ABS_MT_TOUCH_MINOR	0x31	/* Minor axis (omit if circular) */
-- 
cgit v1.2.3


From 0211dda68a4f6531923a2f72d8e8959207f59fba Mon Sep 17 00:00:00 2001
From: Tal Gilboa <talgi@mellanox.com>
Date: Wed, 21 Nov 2018 16:28:23 +0200
Subject: net/dim: Update DIM start sample after each DIM iteration

On every iteration of net_dim, the algorithm may choose to
check for the system state by comparing current data sample
with previous data sample. After each of these comparison,
regardless of the action taken, the sample used as baseline
is needed to be updated.

This patch fixes a bug that causes DIM to take wrong decisions,
due to never updating the baseline sample for comparison between
iterations. This way, DIM always compares current sample with
zeros.

Although this is a functional fix, it also improves and stabilizes
performance as the algorithm works properly now.

Performance:
Tested single UDP TX stream with pktgen:
samples/pktgen/pktgen_sample03_burst_single_flow.sh -i p4p2 -d 1.1.1.1
-m 24:8a:07:88:26:8b -f 3 -b 128

ConnectX-5 100GbE packet rate improved from 15-19Mpps to 19-20Mpps.
Also, toggling between profiles is less frequent with the fix.

Fixes: 8115b750dbcb ("net/dim: use struct net_dim_sample as arg to net_dim")
Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net_dim.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index c79e859408e6..fd458389f7d1 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -406,6 +406,8 @@ static inline void net_dim(struct net_dim *dim,
 		}
 		/* fall through */
 	case NET_DIM_START_MEASURE:
+		net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr,
+			       &dim->start_sample);
 		dim->state = NET_DIM_MEASURE_IN_PROGRESS;
 		break;
 	case NET_DIM_APPLY_NEW_PROFILE:
-- 
cgit v1.2.3


From 5cd8d46ea1562be80063f53c7c6a5f40224de623 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 20 Nov 2018 13:00:18 -0500
Subject: packet: copy user buffers before orphan or clone

tpacket_snd sends packets with user pages linked into skb frags. It
notifies that pages can be reused when the skb is released by setting
skb->destructor to tpacket_destruct_skb.

This can cause data corruption if the skb is orphaned (e.g., on
transmit through veth) or cloned (e.g., on mirror to another psock).

Create a kernel-private copy of data in these cases, same as tun/tap
zerocopy transmission. Reuse that infrastructure: mark the skb as
SKBTX_ZEROCOPY_FRAG, which will trigger copy in skb_orphan_frags(_rx).

Unlike other zerocopy packets, do not set shinfo destructor_arg to
struct ubuf_info. tpacket_destruct_skb already uses that ptr to notify
when the original skb is released and a timestamp is recorded. Do not
change this timestamp behavior. The ubuf_info->callback is not needed
anyway, as no zerocopy notification is expected.

Mark destructor_arg as not-a-uarg by setting the lower bit to 1. The
resulting value is not a valid ubuf_info pointer, nor a valid
tpacket_snd frame address. Add skb_zcopy_.._nouarg helpers for this.

The fix relies on features introduced in commit 52267790ef52 ("sock:
add MSG_ZEROCOPY"), so can be backported as is only to 4.14.

Tested with from `./in_netns.sh ./txring_overwrite` from
http://github.com/wdebruij/kerneltools/tests

Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap")
Reported-by: Anand H. Krishnan <anandhkrishnan@gmail.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 18 +++++++++++++++++-
 net/packet/af_packet.c |  4 ++--
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0ba687454267..0d1b2c3f127b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1326,6 +1326,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
 	}
 }
 
+static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
+{
+	skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
+	skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+}
+
+static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
+{
+	return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
+}
+
+static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
+{
+	return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
+}
+
 /* Release a reference on a zerocopy structure */
 static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
 {
@@ -1335,7 +1351,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
 		if (uarg->callback == sock_zerocopy_callback) {
 			uarg->zerocopy = uarg->zerocopy && zerocopy;
 			sock_zerocopy_put(uarg);
-		} else {
+		} else if (!skb_zcopy_is_nouarg(skb)) {
 			uarg->callback(uarg, zerocopy);
 		}
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ec3095f13aae..a74650e98f42 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2394,7 +2394,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 		void *ph;
 		__u32 ts;
 
-		ph = skb_shinfo(skb)->destructor_arg;
+		ph = skb_zcopy_get_nouarg(skb);
 		packet_dec_pending(&po->tx_ring);
 
 		ts = __packet_set_timestamp(po, ph, skb);
@@ -2461,7 +2461,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->mark = po->sk.sk_mark;
 	skb->tstamp = sockc->transmit_time;
 	sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
-	skb_shinfo(skb)->destructor_arg = ph.raw;
+	skb_zcopy_set_nouarg(skb, ph.raw);
 
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
-- 
cgit v1.2.3


From 89259088c1b7fecb43e8e245dc931909132a4e03 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 17 Nov 2018 11:32:29 +0100
Subject: netfilter: nfnetlink_cttimeout: fetch timeouts for udplite and gre,
 too

syzbot was able to trigger the WARN in cttimeout_default_get() by
passing UDPLITE as l4protocol.  Alias UDPLITE to UDP, both use
same timeout values.

Furthermore, also fetch GRE timeouts.  GRE is a bit more complicated,
as it still can be a module and its netns_proto_gre struct layout isn't
visible outside of the gre module. Can't move timeouts around, it
appears conntrack sysctl unregister assumes net_generic() returns
nf_proto_net, so we get crash. Expose layout of netns_proto_gre instead.

A followup nf-next patch could make gre tracker be built-in as well
if needed, its not that large.

Last, make the WARN() mention the missing protocol value in case
anything else is missing.

Reported-by: syzbot+2fae8fa157dd92618cae@syzkaller.appspotmail.com
Fixes: 8866df9264a3 ("netfilter: nfnetlink_cttimeout: pass default timeout policy to obj_to_nlattr")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_proto_gre.h | 13 +++++++++++++
 net/netfilter/nf_conntrack_proto_gre.c           | 14 ++------------
 net/netfilter/nfnetlink_cttimeout.c              | 15 +++++++++++++--
 3 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index b8d95564bd53..14edb795ab43 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -21,6 +21,19 @@ struct nf_ct_gre_keymap {
 	struct nf_conntrack_tuple tuple;
 };
 
+enum grep_conntrack {
+	GRE_CT_UNREPLIED,
+	GRE_CT_REPLIED,
+	GRE_CT_MAX
+};
+
+struct netns_proto_gre {
+	struct nf_proto_net	nf;
+	rwlock_t		keymap_lock;
+	struct list_head	keymap_list;
+	unsigned int		gre_timeouts[GRE_CT_MAX];
+};
+
 /* add new tuple->key_reply pair to keymap */
 int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 			 struct nf_conntrack_tuple *t);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9b48dc8b4b88..2a5e56c6d8d9 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -43,24 +43,12 @@
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
-enum grep_conntrack {
-	GRE_CT_UNREPLIED,
-	GRE_CT_REPLIED,
-	GRE_CT_MAX
-};
-
 static const unsigned int gre_timeouts[GRE_CT_MAX] = {
 	[GRE_CT_UNREPLIED]	= 30*HZ,
 	[GRE_CT_REPLIED]	= 180*HZ,
 };
 
 static unsigned int proto_gre_net_id __read_mostly;
-struct netns_proto_gre {
-	struct nf_proto_net	nf;
-	rwlock_t		keymap_lock;
-	struct list_head	keymap_list;
-	unsigned int		gre_timeouts[GRE_CT_MAX];
-};
 
 static inline struct netns_proto_gre *gre_pernet(struct net *net)
 {
@@ -402,6 +390,8 @@ static int __init nf_ct_proto_gre_init(void)
 {
 	int ret;
 
+	BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
+
 	ret = register_pernet_subsys(&proto_gre_net_ops);
 	if (ret < 0)
 		goto out_pernet;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index a518eb162344..109b0d27345a 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -455,7 +455,8 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 	case IPPROTO_TCP:
 		timeouts = nf_tcp_pernet(net)->timeouts;
 		break;
-	case IPPROTO_UDP:
+	case IPPROTO_UDP: /* fallthrough */
+	case IPPROTO_UDPLITE:
 		timeouts = nf_udp_pernet(net)->timeouts;
 		break;
 	case IPPROTO_DCCP:
@@ -469,13 +470,23 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 	case IPPROTO_SCTP:
 #ifdef CONFIG_NF_CT_PROTO_SCTP
 		timeouts = nf_sctp_pernet(net)->timeouts;
+#endif
+		break;
+	case IPPROTO_GRE:
+#ifdef CONFIG_NF_CT_PROTO_GRE
+		if (l4proto->net_id) {
+			struct netns_proto_gre *net_gre;
+
+			net_gre = net_generic(net, *l4proto->net_id);
+			timeouts = net_gre->gre_timeouts;
+		}
 #endif
 		break;
 	case 255:
 		timeouts = &nf_generic_pernet(net)->timeout;
 		break;
 	default:
-		WARN_ON_ONCE(1);
+		WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
 		break;
 	}
 
-- 
cgit v1.2.3


From 786a9ab1330169f2602238822b4df5d5c4c98f6c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:17 +0100
Subject: gpio: davinci: restore a way to manually specify the GPIO base

Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and
automatic base selection") broke the network support in legacy boot
mode for da850-evm since we can no longer request the MDIO clock GPIO.

Other boards may be broken too, which I haven't tested.

The problem is in the fact that most board files still use the legacy
GPIO API where lines are requested by numbers rather than descriptors.

While this should be fixed eventually, in order to unbreak the board
for now - provide a way to manually specify the GPIO base in platform
data.

Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 drivers/gpio/gpio-davinci.c                | 2 +-
 include/linux/platform_data/gpio-davinci.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 5c1564fcc24e..bdb29e51b417 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -258,7 +258,7 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 	chips->chip.set = davinci_gpio_set;
 
 	chips->chip.ngpio = ngpio;
-	chips->chip.base = -1;
+	chips->chip.base = pdata->no_auto_base ? pdata->base : -1;
 
 #ifdef CONFIG_OF_GPIO
 	chips->chip.of_gpio_n_cells = 2;
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index f92a47e18034..a93841bfb9f7 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -17,6 +17,8 @@
 #define __DAVINCI_GPIO_PLATFORM_H
 
 struct davinci_gpio_platform_data {
+	bool	no_auto_base;
+	u32	base;
 	u32	ngpio;
 	u32	gpio_unbanked;
 };
-- 
cgit v1.2.3


From 8114865ff82e200b383e46821c25cb0625b842b5 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:10:15 -0500
Subject: function_graph: Create function_graph_enter() to consolidate
 architecture code

Currently all the architectures do basically the same thing in preparing the
function graph tracer on entry to a function. This code can be pulled into a
generic location and then this will allow the function graph tracer to be
fixed, as well as extended.

Create a new function graph helper function_graph_enter() that will call the
hook function (ftrace_graph_entry) and the shadow stack operation
(ftrace_push_return_trace), and remove the need of the architecture code to
manage the shadow stack.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h               |  3 +++
 kernel/trace/trace_functions_graph.c | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a397907e8d72..5717e8f81c59 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -779,6 +779,9 @@ extern void return_to_handler(void);
 extern int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 			 unsigned long frame_pointer, unsigned long *retp);
+extern int
+function_graph_enter(unsigned long ret, unsigned long func,
+		     unsigned long frame_pointer, unsigned long *retp);
 
 unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 				    unsigned long ret, unsigned long *retp);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 169b3c44ee97..28f2602435d0 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -182,6 +182,22 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 	return 0;
 }
 
+int function_graph_enter(unsigned long ret, unsigned long func,
+			 unsigned long frame_pointer, unsigned long *retp)
+{
+	struct ftrace_graph_ent trace;
+
+	trace.func = func;
+	trace.depth = current->curr_ret_stack + 1;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace))
+		return -EBUSY;
+
+	return ftrace_push_return_trace(ret, func, &trace.depth,
+					frame_pointer, retp);
+}
+
 /* Retrieve a function return address to the trace stack on thread info.*/
 static void
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
-- 
cgit v1.2.3


From 584eab291c67894cb17cc87544b9d086228ea70f Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 22 Nov 2018 19:59:46 +0900
Subject: netfilter: add missing error handling code for register functions

register_{netdevice/inetaddr/inet6addr}_notifier may return an error
value, this patch adds the code to handle these error paths.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_nat_masquerade.h |  2 +-
 include/net/netfilter/ipv6/nf_nat_masquerade.h |  2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c            |  7 ++++--
 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c    | 21 +++++++++++++----
 net/ipv4/netfilter/nft_masq_ipv4.c             |  4 +++-
 net/ipv6/netfilter/ip6t_MASQUERADE.c           |  8 +++++--
 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c    | 32 ++++++++++++++++++--------
 net/ipv6/netfilter/nft_masq_ipv6.c             |  4 +++-
 net/netfilter/nft_flow_offload.c               |  5 +++-
 9 files changed, 63 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h
index cd24be4c4a99..13d55206bb9f 100644
--- a/include/net/netfilter/ipv4/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv4/nf_nat_masquerade.h
@@ -9,7 +9,7 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 		       const struct nf_nat_range2 *range,
 		       const struct net_device *out);
 
-void nf_nat_masquerade_ipv4_register_notifier(void);
+int nf_nat_masquerade_ipv4_register_notifier(void);
 void nf_nat_masquerade_ipv4_unregister_notifier(void);
 
 #endif /*_NF_NAT_MASQUERADE_IPV4_H_ */
diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h
index 0c3b5ebf0bb8..2917bf95c437 100644
--- a/include/net/netfilter/ipv6/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv6/nf_nat_masquerade.h
@@ -5,7 +5,7 @@
 unsigned int
 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 		       const struct net_device *out);
-void nf_nat_masquerade_ipv6_register_notifier(void);
+int nf_nat_masquerade_ipv6_register_notifier(void);
 void nf_nat_masquerade_ipv6_unregister_notifier(void);
 
 #endif /* _NF_NAT_MASQUERADE_IPV6_H_ */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ce1512b02cb2..fd3f9e8a74da 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void)
 	int ret;
 
 	ret = xt_register_target(&masquerade_tg_reg);
+	if (ret)
+		return ret;
 
-	if (ret == 0)
-		nf_nat_masquerade_ipv4_register_notifier();
+	ret = nf_nat_masquerade_ipv4_register_notifier();
+	if (ret)
+		xt_unregister_target(&masquerade_tg_reg);
 
 	return ret;
 }
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index a9d5e013e555..c7d7fa4fc369 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -149,16 +149,29 @@ static struct notifier_block masq_inet_notifier = {
 
 static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
 
-void nf_nat_masquerade_ipv4_register_notifier(void)
+int nf_nat_masquerade_ipv4_register_notifier(void)
 {
+	int ret;
+
 	/* check if the notifier was already set */
 	if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
-		return;
+		return 0;
 
 	/* Register for device down reports */
-	register_netdevice_notifier(&masq_dev_notifier);
+	ret = register_netdevice_notifier(&masq_dev_notifier);
+	if (ret)
+		goto err_dec;
 	/* Register IP address change reports */
-	register_inetaddr_notifier(&masq_inet_notifier);
+	ret = register_inetaddr_notifier(&masq_inet_notifier);
+	if (ret)
+		goto err_unregister;
+
+	return ret;
+err_unregister:
+	unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+	atomic_dec(&masquerade_notifier_refcount);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
 
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index f1193e1e928a..6847de1d1db8 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void)
 	if (ret < 0)
 		return ret;
 
-	nf_nat_masquerade_ipv4_register_notifier();
+	ret = nf_nat_masquerade_ipv4_register_notifier();
+	if (ret)
+		nft_unregister_expr(&nft_masq_ipv4_type);
 
 	return ret;
 }
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 491f808e356a..29c7f1915a96 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -58,8 +58,12 @@ static int __init masquerade_tg6_init(void)
 	int err;
 
 	err = xt_register_target(&masquerade_tg6_reg);
-	if (err == 0)
-		nf_nat_masquerade_ipv6_register_notifier();
+	if (err)
+		return err;
+
+	err = nf_nat_masquerade_ipv6_register_notifier();
+	if (err)
+		xt_unregister_target(&masquerade_tg6_reg);
 
 	return err;
 }
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 3e4bf2286abe..7afd1e63d2db 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -132,8 +132,8 @@ static void iterate_cleanup_work(struct work_struct *work)
  * of ipv6 addresses being deleted), we also need to add an upper
  * limit to the number of queued work items.
  */
-static int masq_inet_event(struct notifier_block *this,
-			   unsigned long event, void *ptr)
+static int masq_inet6_event(struct notifier_block *this,
+			    unsigned long event, void *ptr)
 {
 	struct inet6_ifaddr *ifa = ptr;
 	const struct net_device *dev;
@@ -171,20 +171,34 @@ static int masq_inet_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
-static struct notifier_block masq_inet_notifier = {
-	.notifier_call	= masq_inet_event,
+static struct notifier_block masq_inet6_notifier = {
+	.notifier_call	= masq_inet6_event,
 };
 
 static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
 
-void nf_nat_masquerade_ipv6_register_notifier(void)
+int nf_nat_masquerade_ipv6_register_notifier(void)
 {
+	int ret;
+
 	/* check if the notifier is already set */
 	if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
-		return;
+		return 0;
 
-	register_netdevice_notifier(&masq_dev_notifier);
-	register_inet6addr_notifier(&masq_inet_notifier);
+	ret = register_netdevice_notifier(&masq_dev_notifier);
+	if (ret)
+		goto err_dec;
+
+	ret = register_inet6addr_notifier(&masq_inet6_notifier);
+	if (ret)
+		goto err_unregister;
+
+	return ret;
+err_unregister:
+	unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+	atomic_dec(&masquerade_notifier_refcount);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
 
@@ -194,7 +208,7 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void)
 	if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
 		return;
 
-	unregister_inet6addr_notifier(&masq_inet_notifier);
+	unregister_inet6addr_notifier(&masq_inet6_notifier);
 	unregister_netdevice_notifier(&masq_dev_notifier);
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index dd0122f3cffe..e06c82e9dfcd 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -70,7 +70,9 @@ static int __init nft_masq_ipv6_module_init(void)
 	if (ret < 0)
 		return ret;
 
-	nf_nat_masquerade_ipv6_register_notifier();
+	ret = nf_nat_masquerade_ipv6_register_notifier();
+	if (ret)
+		nft_unregister_expr(&nft_masq_ipv6_type);
 
 	return ret;
 }
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index e82d9a966c45..974525eb92df 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -214,7 +214,9 @@ static int __init nft_flow_offload_module_init(void)
 {
 	int err;
 
-	register_netdevice_notifier(&flow_offload_netdev_notifier);
+	err = register_netdevice_notifier(&flow_offload_netdev_notifier);
+	if (err)
+		goto err;
 
 	err = nft_register_expr(&nft_flow_offload_type);
 	if (err < 0)
@@ -224,6 +226,7 @@ static int __init nft_flow_offload_module_init(void)
 
 register_expr:
 	unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+err:
 	return err;
 }
 
-- 
cgit v1.2.3


From e2c95a61656d29ceaac97b6a975c8a1f26e26f15 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 26 Nov 2018 14:05:38 +0100
Subject: bpf, ppc64: generalize fetching subprog into bpf_jit_get_func_addr

Make fetching of the BPF call address from ppc64 JIT generic. ppc64
was using a slightly different variant rather than through the insns'
imm field encoding as the target address would not fit into that space.
Therefore, the target subprog number was encoded into the insns' offset
and fetched through fp->aux->func[off]->bpf_func instead. Given there
are other JITs with this issue and the mechanism of fetching the address
is JIT-generic, move it into the core as a helper instead. On the JIT
side, we get information on whether the retrieved address is a fixed
one, that is, not changing through JIT passes, or a dynamic one. For
the former, JITs can optimize their imm emission because this doesn't
change jump offsets throughout JIT process.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Sandipan Das <sandipan@linux.ibm.com>
Tested-by: Sandipan Das <sandipan@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/powerpc/net/bpf_jit_comp64.c | 57 ++++++++++++++++++++++++++-------------
 include/linux/filter.h            |  4 +++
 kernel/bpf/core.c                 | 34 +++++++++++++++++++++++
 3 files changed, 76 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 50b129785aee..17482f5de3e2 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -166,7 +166,33 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	PPC_BLR();
 }
 
-static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
+static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
+				       u64 func)
+{
+#ifdef PPC64_ELF_ABI_v1
+	/* func points to the function descriptor */
+	PPC_LI64(b2p[TMP_REG_2], func);
+	/* Load actual entry point from function descriptor */
+	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
+	/* ... and move it to LR */
+	PPC_MTLR(b2p[TMP_REG_1]);
+	/*
+	 * Load TOC from function descriptor at offset 8.
+	 * We can clobber r2 since we get called through a
+	 * function pointer (so caller will save/restore r2)
+	 * and since we don't use a TOC ourself.
+	 */
+	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
+#else
+	/* We can clobber r12 */
+	PPC_FUNC_ADDR(12, func);
+	PPC_MTLR(12);
+#endif
+	PPC_BLRL();
+}
+
+static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
+				       u64 func)
 {
 	unsigned int i, ctx_idx = ctx->idx;
 
@@ -273,7 +299,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 {
 	const struct bpf_insn *insn = fp->insnsi;
 	int flen = fp->len;
-	int i;
+	int i, ret;
 
 	/* Start of epilogue code - will only be valid 2nd pass onwards */
 	u32 exit_addr = addrs[flen];
@@ -284,8 +310,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 		u32 src_reg = b2p[insn[i].src_reg];
 		s16 off = insn[i].off;
 		s32 imm = insn[i].imm;
+		bool func_addr_fixed;
+		u64 func_addr;
 		u64 imm64;
-		u8 *func;
 		u32 true_cond;
 		u32 tmp_idx;
 
@@ -711,23 +738,15 @@ emit_clear:
 		case BPF_JMP | BPF_CALL:
 			ctx->seen |= SEEN_FUNC;
 
-			/* bpf function call */
-			if (insn[i].src_reg == BPF_PSEUDO_CALL)
-				if (!extra_pass)
-					func = NULL;
-				else if (fp->aux->func && off < fp->aux->func_cnt)
-					/* use the subprog id from the off
-					 * field to lookup the callee address
-					 */
-					func = (u8 *) fp->aux->func[off]->bpf_func;
-				else
-					return -EINVAL;
-			/* kernel helper call */
-			else
-				func = (u8 *) __bpf_call_base + imm;
-
-			bpf_jit_emit_func_call(image, ctx, (u64)func);
+			ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+						    &func_addr, &func_addr_fixed);
+			if (ret < 0)
+				return ret;
 
+			if (func_addr_fixed)
+				bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
+			else
+				bpf_jit_emit_func_call_rel(image, ctx, func_addr);
 			/* move return value from r3 to BPF_REG_0 */
 			PPC_MR(b2p[BPF_REG_0], 3);
 			break;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index de629b706d1d..448dcc448f1f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -866,6 +866,10 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr);
 
 void bpf_jit_free(struct bpf_prog *fp);
 
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+			  const struct bpf_insn *insn, bool extra_pass,
+			  u64 *func_addr, bool *func_addr_fixed);
+
 struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
 void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1a796e0799ec..b1a3545d0ec8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -672,6 +672,40 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
 	bpf_prog_unlock_free(fp);
 }
 
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+			  const struct bpf_insn *insn, bool extra_pass,
+			  u64 *func_addr, bool *func_addr_fixed)
+{
+	s16 off = insn->off;
+	s32 imm = insn->imm;
+	u8 *addr;
+
+	*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
+	if (!*func_addr_fixed) {
+		/* Place-holder address till the last pass has collected
+		 * all addresses for JITed subprograms in which case we
+		 * can pick them up from prog->aux.
+		 */
+		if (!extra_pass)
+			addr = NULL;
+		else if (prog->aux->func &&
+			 off >= 0 && off < prog->aux->func_cnt)
+			addr = (u8 *)prog->aux->func[off]->bpf_func;
+		else
+			return -EINVAL;
+	} else {
+		/* Address of a BPF helper call. Since part of the core
+		 * kernel, it's always at a fixed location. __bpf_call_base
+		 * and the helper with imm relative to it are both in core
+		 * kernel.
+		 */
+		addr = (u8 *)__bpf_call_base + imm;
+	}
+
+	*func_addr = (unsigned long)addr;
+	return 0;
+}
+
 static int bpf_jit_blind_insn(const struct bpf_insn *from,
 			      const struct bpf_insn *aux,
 			      struct bpf_insn *to_buff)
-- 
cgit v1.2.3


From d125f3f866df88da5a85df00291f88f0baa89f7c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 19 Nov 2018 07:40:39 -0500
Subject: function_graph: Make ftrace_push_return_trace() static

As all architectures now call function_graph_enter() to do the entry work,
no architecture should ever call ftrace_push_return_trace(). Make it static.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h               | 3 ---
 kernel/trace/trace_functions_graph.c | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 5717e8f81c59..dd16e8218db3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -776,9 +776,6 @@ struct ftrace_ret_stack {
  */
 extern void return_to_handler(void);
 
-extern int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
-			 unsigned long frame_pointer, unsigned long *retp);
 extern int
 function_graph_enter(unsigned long ret, unsigned long func,
 		     unsigned long frame_pointer, unsigned long *retp);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 28f2602435d0..88ca787a1cdc 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -118,7 +118,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
 		     struct trace_seq *s, u32 flags);
 
 /* Add a function return address to the trace stack on thread info.*/
-int
+static int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 			 unsigned long frame_pointer, unsigned long *retp)
 {
-- 
cgit v1.2.3


From 39eb456dacb543de90d3bc6a8e0ac5cf51ac475e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 19 Nov 2018 08:07:12 -0500
Subject: function_graph: Use new curr_ret_depth to manage depth instead of
 curr_ret_stack

Currently, the depth of the ret_stack is determined by curr_ret_stack index.
The issue is that there's a race between setting of the curr_ret_stack and
calling of the callback attached to the return of the function.

Commit 03274a3ffb44 ("tracing/fgraph: Adjust fgraph depth before calling
trace return callback") moved the calling of the callback to after the
setting of the curr_ret_stack, even stating that it was safe to do so, when
in fact, it was the reason there was a barrier() there (yes, I should have
commented that barrier()).

Not only does the curr_ret_stack keep track of the current call graph depth,
it also keeps the ret_stack content from being overwritten by new data.

The function profiler, uses the "subtime" variable of ret_stack structure
and by moving the curr_ret_stack, it allows for interrupts to use the same
structure it was using, corrupting the data, and breaking the profiler.

To fix this, there needs to be two variables to handle the call stack depth
and the pointer to where the ret_stack is being used, as they need to change
at two different locations.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/sched.h                |  1 +
 kernel/trace/ftrace.c                |  3 +++
 kernel/trace/trace_functions_graph.c | 21 +++++++++++++--------
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a51c13c2b1a0..d6183a55e8eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1116,6 +1116,7 @@ struct task_struct {
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	/* Index of current stored address in ret_stack: */
 	int				curr_ret_stack;
+	int				curr_ret_depth;
 
 	/* Stack of return addresses for return function tracing: */
 	struct ftrace_ret_stack		*ret_stack;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f536f601bd46..48513954713c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6814,6 +6814,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
 			atomic_set(&t->tracing_graph_pause, 0);
 			atomic_set(&t->trace_overrun, 0);
 			t->curr_ret_stack = -1;
+			t->curr_ret_depth = -1;
 			/* Make sure the tasks see the -1 first: */
 			smp_wmb();
 			t->ret_stack = ret_stack_list[start++];
@@ -7038,6 +7039,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
 void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
 {
 	t->curr_ret_stack = -1;
+	t->curr_ret_depth = -1;
 	/*
 	 * The idle task has no parent, it either has its own
 	 * stack or no stack at all.
@@ -7068,6 +7070,7 @@ void ftrace_graph_init_task(struct task_struct *t)
 	/* Make sure we do not use the parent ret_stack */
 	t->ret_stack = NULL;
 	t->curr_ret_stack = -1;
+	t->curr_ret_depth = -1;
 
 	if (ftrace_graph_active) {
 		struct ftrace_ret_stack *ret_stack;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 88ca787a1cdc..02d4081a7f5a 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -119,7 +119,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
 
 /* Add a function return address to the trace stack on thread info.*/
 static int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
+ftrace_push_return_trace(unsigned long ret, unsigned long func,
 			 unsigned long frame_pointer, unsigned long *retp)
 {
 	unsigned long long calltime;
@@ -177,8 +177,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 	current->ret_stack[index].retp = retp;
 #endif
-	*depth = current->curr_ret_stack;
-
 	return 0;
 }
 
@@ -188,14 +186,20 @@ int function_graph_enter(unsigned long ret, unsigned long func,
 	struct ftrace_graph_ent trace;
 
 	trace.func = func;
-	trace.depth = current->curr_ret_stack + 1;
+	trace.depth = ++current->curr_ret_depth;
 
 	/* Only trace if the calling function expects to */
 	if (!ftrace_graph_entry(&trace))
-		return -EBUSY;
+		goto out;
 
-	return ftrace_push_return_trace(ret, func, &trace.depth,
-					frame_pointer, retp);
+	if (ftrace_push_return_trace(ret, func,
+				     frame_pointer, retp))
+		goto out;
+
+	return 0;
+ out:
+	current->curr_ret_depth--;
+	return -EBUSY;
 }
 
 /* Retrieve a function return address to the trace stack on thread info.*/
@@ -257,7 +261,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
 	trace->func = current->ret_stack[index].func;
 	trace->calltime = current->ret_stack[index].calltime;
 	trace->overrun = atomic_read(&current->trace_overrun);
-	trace->depth = index;
+	trace->depth = current->curr_ret_depth;
 }
 
 /*
@@ -273,6 +277,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 	trace.rettime = trace_clock_local();
 	barrier();
 	current->curr_ret_stack--;
+	current->curr_ret_depth--;
 	/*
 	 * The curr_ret_stack can be less than -1 only if it was
 	 * filtered out and it's about to return from the function.
-- 
cgit v1.2.3


From 3054426dc68e5d63aa6a6e9b91ac4ec78e3f3805 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <pkondeti@codeaurora.org>
Date: Tue, 30 Oct 2018 12:24:33 +0530
Subject: sched, trace: Fix prev_state output in sched_switch tracepoint

commit 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout")
tried to fix the problem introduced by a previous commit efb40f588b43
("sched/tracing: Fix trace_sched_switch task-state printing"). However
the prev_state output in sched_switch is still broken.

task_state_index() uses fls() which considers the LSB as 1. Left
shifting 1 by this value gives an incorrect mapping to the task state.
Fix this by decrementing the value returned by __get_task_state()
before shifting.

Link: http://lkml.kernel.org/r/1540882473-1103-1-git-send-email-pkondeti@codeaurora.org

Cc: stable@vger.kernel.org
Fixes: 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout")
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/trace/events/sched.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index f07b270d4fc4..9a4bdfadab07 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -107,6 +107,8 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
 #ifdef CREATE_TRACE_POINTS
 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
 {
+	unsigned int state;
+
 #ifdef CONFIG_SCHED_DEBUG
 	BUG_ON(p != current);
 #endif /* CONFIG_SCHED_DEBUG */
@@ -118,7 +120,15 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
 	if (preempt)
 		return TASK_REPORT_MAX;
 
-	return 1 << task_state_index(p);
+	/*
+	 * task_state_index() uses fls() and returns a value from 0-8 range.
+	 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
+	 * it for left shift operation to get the correct task->state
+	 * mapping.
+	 */
+	state = task_state_index(p);
+
+	return state ? (1 << (state - 1)) : state;
 }
 #endif /* CREATE_TRACE_POINTS */
 
-- 
cgit v1.2.3


From 321a874a7ef85655e93b3206d0f36b4a6097f948 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:38 +0100
Subject: sched/smt: Expose sched_smt_present static key

Make the scheduler's 'sched_smt_present' static key globaly available, so
it can be used in the x86 speculation control code.

Provide a query function and a stub for the CONFIG_SMP=n case.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.430168326@linutronix.de
---
 include/linux/sched/smt.h | 18 ++++++++++++++++++
 kernel/sched/sched.h      |  4 +---
 2 files changed, 19 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/sched/smt.h

(limited to 'include')

diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
new file mode 100644
index 000000000000..c9e0be514110
--- /dev/null
+++ b/include/linux/sched/smt.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_SMT_H
+#define _LINUX_SCHED_SMT_H
+
+#include <linux/static_key.h>
+
+#ifdef CONFIG_SCHED_SMT
+extern struct static_key_false sched_smt_present;
+
+static __always_inline bool sched_smt_active(void)
+{
+	return static_branch_likely(&sched_smt_present);
+}
+#else
+static inline bool sched_smt_active(void) { return false; }
+#endif
+
+#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 618577fc9aa8..4e524ab589c9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -23,6 +23,7 @@
 #include <linux/sched/prio.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/smt.h>
 #include <linux/sched/stat.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/task.h>
@@ -936,9 +937,6 @@ static inline int cpu_of(struct rq *rq)
 
 
 #ifdef CONFIG_SCHED_SMT
-
-extern struct static_key_false sched_smt_present;
-
 extern void __update_idle_core(struct rq *rq);
 
 static inline void update_idle_core(struct rq *rq)
-- 
cgit v1.2.3


From a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:39 +0100
Subject: x86/speculation: Rework SMT state change

arch_smt_update() is only called when the sysfs SMT control knob is
changed. This means that when SMT is enabled in the sysfs control knob the
system is considered to have SMT active even if all siblings are offline.

To allow finegrained control of the speculation mitigations, the actual SMT
state is more interesting than the fact that siblings could be enabled.

Rework the code, so arch_smt_update() is invoked from each individual CPU
hotplug function, and simplify the update function while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 11 +++++------
 include/linux/sched/smt.h  |  2 ++
 kernel/cpu.c               | 15 +++++++++------
 3 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a723af0c4400..5625b323ff32 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/nospec.h>
 #include <linux/prctl.h>
+#include <linux/sched/smt.h>
 
 #include <asm/spec-ctrl.h>
 #include <asm/cmdline.h>
@@ -344,16 +345,14 @@ void arch_smt_update(void)
 		return;
 
 	mutex_lock(&spec_ctrl_mutex);
-	mask = x86_spec_ctrl_base;
-	if (cpu_smt_control == CPU_SMT_ENABLED)
+
+	mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
+	if (sched_smt_active())
 		mask |= SPEC_CTRL_STIBP;
-	else
-		mask &= ~SPEC_CTRL_STIBP;
 
 	if (mask != x86_spec_ctrl_base) {
 		pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n",
-				cpu_smt_control == CPU_SMT_ENABLED ?
-				"Enabling" : "Disabling");
+			mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling");
 		x86_spec_ctrl_base = mask;
 		on_each_cpu(update_stibp_msr, NULL, 1);
 	}
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index c9e0be514110..59d3736c454c 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -15,4 +15,6 @@ static __always_inline bool sched_smt_active(void)
 static inline bool sched_smt_active(void) { return false; }
 #endif
 
+void arch_smt_update(void);
+
 #endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3c7f3b4c453c..91d5c38eb7e5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -10,6 +10,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/smt.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
@@ -367,6 +368,12 @@ static void lockdep_release_cpus_lock(void)
 
 #endif	/* CONFIG_HOTPLUG_CPU */
 
+/*
+ * Architectures that need SMT-specific errata handling during SMT hotplug
+ * should override this.
+ */
+void __weak arch_smt_update(void) { }
+
 #ifdef CONFIG_HOTPLUG_SMT
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 EXPORT_SYMBOL_GPL(cpu_smt_control);
@@ -1011,6 +1018,7 @@ out:
 	 * concurrent CPU hotplug via cpu_add_remove_lock.
 	 */
 	lockup_detector_cleanup();
+	arch_smt_update();
 	return ret;
 }
 
@@ -1139,6 +1147,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 	ret = cpuhp_up_callbacks(cpu, st, target);
 out:
 	cpus_write_unlock();
+	arch_smt_update();
 	return ret;
 }
 
@@ -2055,12 +2064,6 @@ static void cpuhp_online_cpu_device(unsigned int cpu)
 	kobject_uevent(&dev->kobj, KOBJ_ONLINE);
 }
 
-/*
- * Architectures that need SMT-specific errata handling during SMT hotplug
- * should override this.
- */
-void __weak arch_smt_update(void) { };
-
 static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 {
 	int cpu, ret = 0;
-- 
cgit v1.2.3


From 46f7ecb1e7359f183f5bbd1e08b90e10e52164f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:50 +0100
Subject: ptrace: Remove unused ptrace_may_access_sched() and MODE_IBRS

The IBPB control code in x86 removed the usage. Remove the functionality
which was introduced for this.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.559149393@linutronix.de
---
 include/linux/ptrace.h | 17 -----------------
 kernel/ptrace.c        | 10 ----------
 2 files changed, 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 6c2ffed907f5..de20ede2c5c8 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -64,15 +64,12 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_NOAUDIT	0x04
 #define PTRACE_MODE_FSCREDS	0x08
 #define PTRACE_MODE_REALCREDS	0x10
-#define PTRACE_MODE_SCHED	0x20
-#define PTRACE_MODE_IBPB	0x40
 
 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
 #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
 #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
 #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
 #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
-#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB)
 
 /**
  * ptrace_may_access - check whether the caller is permitted to access
@@ -90,20 +87,6 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
  */
 extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
 
-/**
- * ptrace_may_access - check whether the caller is permitted to access
- * a target task.
- * @task: target task
- * @mode: selects type of access and caller credentials
- *
- * Returns true on success, false on denial.
- *
- * Similar to ptrace_may_access(). Only to be called from context switch
- * code. Does not call into audit and the regular LSM hooks due to locking
- * constraints.
- */
-extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode);
-
 static inline int ptrace_reparented(struct task_struct *child)
 {
 	return !same_thread_group(child->real_parent, child->parent);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 80b34dffdfb9..c2cee9db5204 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -261,9 +261,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 
 static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
 {
-	if (mode & PTRACE_MODE_SCHED)
-		return false;
-
 	if (mode & PTRACE_MODE_NOAUDIT)
 		return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
 	else
@@ -331,16 +328,9 @@ ok:
 	     !ptrace_has_cap(mm->user_ns, mode)))
 	    return -EPERM;
 
-	if (mode & PTRACE_MODE_SCHED)
-		return 0;
 	return security_ptrace_access_check(task, mode);
 }
 
-bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode)
-{
-	return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED);
-}
-
 bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
 	int err;
-- 
cgit v1.2.3


From 9137bb27e60e554dab694eafa4cca241fa3a694f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:53 +0100
Subject: x86/speculation: Add prctl() control for indirect branch speculation

Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and
PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of
indirect branch speculation via STIBP and IBPB.

Invocations:
 Check indirect branch speculation status with
 - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);

 Enable indirect branch speculation with
 - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);

 Disable indirect branch speculation with
 - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);

 Force disable indirect branch speculation with
 - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);

See Documentation/userspace-api/spec_ctrl.rst.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de
---
 Documentation/userspace-api/spec_ctrl.rst |  9 +++++
 arch/x86/include/asm/nospec-branch.h      |  1 +
 arch/x86/kernel/cpu/bugs.c                | 67 +++++++++++++++++++++++++++++++
 arch/x86/kernel/process.c                 |  5 +++
 include/linux/sched.h                     |  9 +++++
 include/uapi/linux/prctl.h                |  1 +
 tools/include/uapi/linux/prctl.h          |  1 +
 7 files changed, 93 insertions(+)

(limited to 'include')

diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
index 32f3d55c54b7..c4dbe6f7cdae 100644
--- a/Documentation/userspace-api/spec_ctrl.rst
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -92,3 +92,12 @@ Speculation misfeature controls
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
+
+- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
+                        (Mitigate Spectre V2 style attacks against user processes)
+
+  Invocations:
+   * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index d4d35baf0430..2adbe7b047fa 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -232,6 +232,7 @@ enum spectre_v2_mitigation {
 enum spectre_v2_user_mitigation {
 	SPECTRE_V2_USER_NONE,
 	SPECTRE_V2_USER_STRICT,
+	SPECTRE_V2_USER_PRCTL,
 };
 
 /* The Speculative Store Bypass disable variants */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9cab538e10f1..74359fff87fd 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -566,6 +566,8 @@ void arch_smt_update(void)
 	case SPECTRE_V2_USER_STRICT:
 		update_stibp_strict();
 		break;
+	case SPECTRE_V2_USER_PRCTL:
+		break;
 	}
 
 	mutex_unlock(&spec_ctrl_mutex);
@@ -752,12 +754,50 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 	return 0;
 }
 
+static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+	switch (ctrl) {
+	case PR_SPEC_ENABLE:
+		if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+			return 0;
+		/*
+		 * Indirect branch speculation is always disabled in strict
+		 * mode.
+		 */
+		if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+			return -EPERM;
+		task_clear_spec_ib_disable(task);
+		task_update_spec_tif(task);
+		break;
+	case PR_SPEC_DISABLE:
+	case PR_SPEC_FORCE_DISABLE:
+		/*
+		 * Indirect branch speculation is always allowed when
+		 * mitigation is force disabled.
+		 */
+		if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+			return -EPERM;
+		if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+			return 0;
+		task_set_spec_ib_disable(task);
+		if (ctrl == PR_SPEC_FORCE_DISABLE)
+			task_set_spec_ib_force_disable(task);
+		task_update_spec_tif(task);
+		break;
+	default:
+		return -ERANGE;
+	}
+	return 0;
+}
+
 int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
 			     unsigned long ctrl)
 {
 	switch (which) {
 	case PR_SPEC_STORE_BYPASS:
 		return ssb_prctl_set(task, ctrl);
+	case PR_SPEC_INDIRECT_BRANCH:
+		return ib_prctl_set(task, ctrl);
 	default:
 		return -ENODEV;
 	}
@@ -790,11 +830,34 @@ static int ssb_prctl_get(struct task_struct *task)
 	}
 }
 
+static int ib_prctl_get(struct task_struct *task)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		return PR_SPEC_NOT_AFFECTED;
+
+	switch (spectre_v2_user) {
+	case SPECTRE_V2_USER_NONE:
+		return PR_SPEC_ENABLE;
+	case SPECTRE_V2_USER_PRCTL:
+		if (task_spec_ib_force_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+		if (task_spec_ib_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+	case SPECTRE_V2_USER_STRICT:
+		return PR_SPEC_DISABLE;
+	default:
+		return PR_SPEC_NOT_AFFECTED;
+	}
+}
+
 int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
 {
 	switch (which) {
 	case PR_SPEC_STORE_BYPASS:
 		return ssb_prctl_get(task);
+	case PR_SPEC_INDIRECT_BRANCH:
+		return ib_prctl_get(task);
 	default:
 		return -ENODEV;
 	}
@@ -974,6 +1037,8 @@ static char *stibp_state(void)
 		return ", STIBP: disabled";
 	case SPECTRE_V2_USER_STRICT:
 		return ", STIBP: forced";
+	case SPECTRE_V2_USER_PRCTL:
+		return "";
 	}
 	return "";
 }
@@ -986,6 +1051,8 @@ static char *ibpb_state(void)
 			return ", IBPB: disabled";
 		case SPECTRE_V2_USER_STRICT:
 			return ", IBPB: always-on";
+		case SPECTRE_V2_USER_PRCTL:
+			return "";
 		}
 	}
 	return "";
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index afbe2eb4a1c6..7d31192296a8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -450,6 +450,11 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
 			set_tsk_thread_flag(tsk, TIF_SSBD);
 		else
 			clear_tsk_thread_flag(tsk, TIF_SSBD);
+
+		if (task_spec_ib_disable(tsk))
+			set_tsk_thread_flag(tsk, TIF_SPEC_IB);
+		else
+			clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
 	}
 	/* Return the updated threadinfo flags*/
 	return task_thread_info(tsk)->flags;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a51c13c2b1a0..d607db5fcc6a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1453,6 +1453,8 @@ static inline bool is_percpu_thread(void)
 #define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
 #define PFA_SPEC_SSB_DISABLE		3	/* Speculative Store Bypass disabled */
 #define PFA_SPEC_SSB_FORCE_DISABLE	4	/* Speculative Store Bypass force disabled*/
+#define PFA_SPEC_IB_DISABLE		5	/* Indirect branch speculation restricted */
+#define PFA_SPEC_IB_FORCE_DISABLE	6	/* Indirect branch speculation permanently restricted */
 
 #define TASK_PFA_TEST(name, func)					\
 	static inline bool task_##func(struct task_struct *p)		\
@@ -1484,6 +1486,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
 TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 
+TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
+
+TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+
 static inline void
 current_restore_flags(unsigned long orig_flags, unsigned long flags)
 {
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index c0d7ea0bf5b6..b17201edfa09 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -212,6 +212,7 @@ struct prctl_mm_map {
 #define PR_SET_SPECULATION_CTRL		53
 /* Speculation control variants */
 # define PR_SPEC_STORE_BYPASS		0
+# define PR_SPEC_INDIRECT_BRANCH	1
 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */
 # define PR_SPEC_NOT_AFFECTED		0
 # define PR_SPEC_PRCTL			(1UL << 0)
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index c0d7ea0bf5b6..b17201edfa09 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -212,6 +212,7 @@ struct prctl_mm_map {
 #define PR_SET_SPECULATION_CTRL		53
 /* Speculation control variants */
 # define PR_SPEC_STORE_BYPASS		0
+# define PR_SPEC_INDIRECT_BRANCH	1
 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */
 # define PR_SPEC_NOT_AFFECTED		0
 # define PR_SPEC_PRCTL			(1UL << 0)
-- 
cgit v1.2.3


From 3f2b7b9035107d6096ea438ea3d97dcf0481b6d2 Mon Sep 17 00:00:00 2001
From: "kiran.modukuri" <kiran.modukuri@gmail.com>
Date: Mon, 26 Nov 2018 15:41:48 +0000
Subject: fscache: Fix race in fscache_op_complete() due to split atomic_sub &
 read

The code in fscache_retrieval_complete is using atomic_sub followed by an
atomic_read:

        atomic_sub(n_pages, &op->n_pages);
        if (atomic_read(&op->n_pages) <= 0)
                fscache_op_complete(&op->op, true);

This causes two threads doing a decrement of n_pages to race with each
other seeing the op->refcount 0 at same time - and they end up calling
fscache_op_complete() in both the threads leading to an assertion failure.

Fix this by using atomic_sub_return_relaxed() instead of two calls.  Note
that I'm using 'relaxed' rather than, say, 'release' as there aren't
multiple variables that appear to need ordering across the release.

The oops looks something like:

FS-Cache: Assertion failed
FS-Cache: 0 > 0 is false
...
kernel BUG at /usr/src/linux-4.4.0/fs/fscache/operation.c:449!
...
Workqueue: fscache_operation fscache_op_work_func [fscache]
...
RIP: 0010:[<ffffffffc037eacd>] fscache_op_complete+0x10d/0x180 [fscache]
...
Call Trace:
 [<ffffffffc1464cf9>] cachefiles_read_copier+0x3a9/0x410 [cachefiles]
 [<ffffffffc037e272>] fscache_op_work_func+0x22/0x50 [fscache]
 [<ffffffff81096da0>] process_one_work+0x150/0x3f0
 [<ffffffff8109751a>] worker_thread+0x11a/0x470
 [<ffffffff81808e59>] ? __schedule+0x359/0x980
 [<ffffffff81097400>] ? rescuer_thread+0x310/0x310
 [<ffffffff8109cdd6>] kthread+0xd6/0xf0
 [<ffffffff8109cd00>] ? kthread_park+0x60/0x60
 [<ffffffff8180d0cf>] ret_from_fork+0x3f/0x70
 [<ffffffff8109cd00>] ? kthread_park+0x60/0x60

This seen this in 4.4.x kernels and the same bug affects fscache in latest
upstreams kernels.

Fixes: 1bb4b7f98f36 ("FS-Cache: The retrieval remaining-pages counter needs to be atomic_t")
Signed-off-by: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 34cf0fdd7dc7..610815e3f1aa 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -196,8 +196,7 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
 static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
 					      int n_pages)
 {
-	atomic_sub(n_pages, &op->n_pages);
-	if (atomic_read(&op->n_pages) <= 0)
+	if (atomic_sub_return_relaxed(n_pages, &op->n_pages) <= 0)
 		fscache_op_complete(&op->op, false);
 }
 
-- 
cgit v1.2.3


From 123664101aa2156d05251704fc63f9bcbf77741a Mon Sep 17 00:00:00 2001
From: Igor Druzhinin <igor.druzhinin@citrix.com>
Date: Tue, 27 Nov 2018 20:58:21 +0000
Subject: Revert "xen/balloon: Mark unallocated host memory as UNUSABLE"

This reverts commit b3cf8528bb21febb650a7ecbf080d0647be40b9f.

That commit unintentionally broke Xen balloon memory hotplug with
"hotplug_unpopulated" set to 1. As long as "System RAM" resource
got assigned under a new "Unusable memory" resource in IO/Mem tree
any attempt to online this memory would fail due to general kernel
restrictions on having "System RAM" resources as 1st level only.

The original issue that commit has tried to workaround fa564ad96366
("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f,
60-7f)") also got amended by the following 03a551734 ("x86/PCI: Move
and shrink AMD 64-bit window to avoid conflict") which made the
original fix to Xen ballooning unnecessary.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten.c | 78 ------------------------------------------------
 arch/x86/xen/setup.c     |  6 ++--
 drivers/xen/balloon.c    | 65 ++++++----------------------------------
 include/xen/balloon.h    |  5 ----
 4 files changed, 13 insertions(+), 141 deletions(-)

(limited to 'include')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 67b2f31a1265..aa1cc483bd2a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -10,7 +10,6 @@
 #include <xen/xen.h>
 #include <xen/features.h>
 #include <xen/page.h>
-#include <xen/interface/memory.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -346,80 +345,3 @@ void xen_arch_unregister_cpu(int num)
 }
 EXPORT_SYMBOL(xen_arch_unregister_cpu);
 #endif
-
-#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
-void __init arch_xen_balloon_init(struct resource *hostmem_resource)
-{
-	struct xen_memory_map memmap;
-	int rc;
-	unsigned int i, last_guest_ram;
-	phys_addr_t max_addr = PFN_PHYS(max_pfn);
-	struct e820_table *xen_e820_table;
-	const struct e820_entry *entry;
-	struct resource *res;
-
-	if (!xen_initial_domain())
-		return;
-
-	xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
-	if (!xen_e820_table)
-		return;
-
-	memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
-	set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
-	rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
-	if (rc) {
-		pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
-		goto out;
-	}
-
-	last_guest_ram = 0;
-	for (i = 0; i < memmap.nr_entries; i++) {
-		if (xen_e820_table->entries[i].addr >= max_addr)
-			break;
-		if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
-			last_guest_ram = i;
-	}
-
-	entry = &xen_e820_table->entries[last_guest_ram];
-	if (max_addr >= entry->addr + entry->size)
-		goto out; /* No unallocated host RAM. */
-
-	hostmem_resource->start = max_addr;
-	hostmem_resource->end = entry->addr + entry->size;
-
-	/*
-	 * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
-	 * as unavailable. The rest of that region can be used for hotplug-based
-	 * ballooning.
-	 */
-	for (; i < memmap.nr_entries; i++) {
-		entry = &xen_e820_table->entries[i];
-
-		if (entry->type == E820_TYPE_RAM)
-			continue;
-
-		if (entry->addr >= hostmem_resource->end)
-			break;
-
-		res = kzalloc(sizeof(*res), GFP_KERNEL);
-		if (!res)
-			goto out;
-
-		res->name = "Unavailable host RAM";
-		res->start = entry->addr;
-		res->end = (entry->addr + entry->size < hostmem_resource->end) ?
-			    entry->addr + entry->size : hostmem_resource->end;
-		rc = insert_resource(hostmem_resource, res);
-		if (rc) {
-			pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
-				__func__, res->start, res->end, rc);
-			kfree(res);
-			goto  out;
-		}
-	}
-
- out:
-	kfree(xen_e820_table);
-}
-#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1163e33121fb..075ed47993bb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,6 +808,7 @@ char * __init xen_memory_setup(void)
 	addr = xen_e820_table.entries[0].addr;
 	size = xen_e820_table.entries[0].size;
 	while (i < xen_e820_table.nr_entries) {
+		bool discard = false;
 
 		chunk_size = size;
 		type = xen_e820_table.entries[i].type;
@@ -823,10 +824,11 @@ char * __init xen_memory_setup(void)
 				xen_add_extra_mem(pfn_s, n_pfns);
 				xen_max_p2m_pfn = pfn_s + n_pfns;
 			} else
-				type = E820_TYPE_UNUSABLE;
+				discard = true;
 		}
 
-		xen_align_and_add_e820_region(addr, chunk_size, type);
+		if (!discard)
+			xen_align_and_add_e820_region(addr, chunk_size, type);
 
 		addr += chunk_size;
 		size -= chunk_size;
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index e12bb256036f..7ab6caef599c 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -251,25 +251,10 @@ static void release_memory_resource(struct resource *resource)
 	kfree(resource);
 }
 
-/*
- * Host memory not allocated to dom0. We can use this range for hotplug-based
- * ballooning.
- *
- * It's a type-less resource. Setting IORESOURCE_MEM will make resource
- * management algorithms (arch_remove_reservations()) look into guest e820,
- * which we don't want.
- */
-static struct resource hostmem_resource = {
-	.name   = "Host RAM",
-};
-
-void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
-{}
-
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
-	struct resource *res, *res_hostmem;
-	int ret = -ENOMEM;
+	struct resource *res;
+	int ret;
 
 	res = kzalloc(sizeof(*res), GFP_KERNEL);
 	if (!res)
@@ -278,42 +263,13 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 	res->name = "System RAM";
 	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
-	res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
-	if (res_hostmem) {
-		/* Try to grab a range from hostmem */
-		res_hostmem->name = "Host memory";
-		ret = allocate_resource(&hostmem_resource, res_hostmem,
-					size, 0, -1,
-					PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
-	}
-
-	if (!ret) {
-		/*
-		 * Insert this resource into iomem. Because hostmem_resource
-		 * tracks portion of guest e820 marked as UNUSABLE noone else
-		 * should try to use it.
-		 */
-		res->start = res_hostmem->start;
-		res->end = res_hostmem->end;
-		ret = insert_resource(&iomem_resource, res);
-		if (ret < 0) {
-			pr_err("Can't insert iomem_resource [%llx - %llx]\n",
-				res->start, res->end);
-			release_memory_resource(res_hostmem);
-			res_hostmem = NULL;
-			res->start = res->end = 0;
-		}
-	}
-
-	if (ret) {
-		ret = allocate_resource(&iomem_resource, res,
-					size, 0, -1,
-					PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
-		if (ret < 0) {
-			pr_err("Cannot allocate new System RAM resource\n");
-			kfree(res);
-			return NULL;
-		}
+	ret = allocate_resource(&iomem_resource, res,
+				size, 0, -1,
+				PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+	if (ret < 0) {
+		pr_err("Cannot allocate new System RAM resource\n");
+		kfree(res);
+		return NULL;
 	}
 
 #ifdef CONFIG_SPARSEMEM
@@ -325,7 +281,6 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 			pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
 			       pfn, limit);
 			release_memory_resource(res);
-			release_memory_resource(res_hostmem);
 			return NULL;
 		}
 	}
@@ -747,8 +702,6 @@ static int __init balloon_init(void)
 	set_online_page_callback(&xen_online_page);
 	register_memory_notifier(&xen_memory_nb);
 	register_sysctl_table(xen_root);
-
-	arch_xen_balloon_init(&hostmem_resource);
 #endif
 
 #ifdef CONFIG_XEN_PV
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index 61f410fd74e4..4914b93a23f2 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -44,8 +44,3 @@ static inline void xen_balloon_init(void)
 {
 }
 #endif
-
-#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
-struct resource;
-void arch_xen_balloon_init(struct resource *hostmem_resource);
-#endif
-- 
cgit v1.2.3


From 89d328f637b9904b6d4c9af73c8a608b8dd4d6f8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 1 Nov 2018 16:17:22 -0700
Subject: pstore/ram: Correctly calculate usable PRZ bytes

The actual number of bytes stored in a PRZ is smaller than the
bytes requested by platform data, since there is a header on each
PRZ. Additionally, if ECC is enabled, there are trailing bytes used
as well. Normally this mismatch doesn't matter since PRZs are circular
buffers and the leading "overflow" bytes are just thrown away. However, in
the case of a compressed record, this rather badly corrupts the results.

This corruption was visible with "ramoops.mem_size=204800 ramoops.ecc=1".
Any stored crashes would not be uncompressable (producing a pstorefs
"dmesg-*.enc.z" file), and triggering errors at boot:

  [    2.790759] pstore: crypto_comp_decompress failed, ret = -22!

Backporting this depends on commit 70ad35db3321 ("pstore: Convert console
write to use ->write_buf")

Reported-by: Joel Fernandes <joel@joelfernandes.org>
Fixes: b0aad7a99c1d ("pstore: Add compression support to pstore")
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 fs/pstore/ram.c        | 15 ++++++---------
 include/linux/pstore.h |  5 ++++-
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 712960e117fe..8646fe6e916f 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -816,17 +816,14 @@ static int ramoops_probe(struct platform_device *pdev)
 
 	cxt->pstore.data = cxt;
 	/*
-	 * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we
-	 * have to handle dumps, we must have at least record_size buffer. And
-	 * for ftrace, bufsize is irrelevant (if bufsize is 0, buf will be
-	 * ZERO_SIZE_PTR).
+	 * Since bufsize is only used for dmesg crash dumps, it
+	 * must match the size of the dprz record (after PRZ header
+	 * and ECC bytes have been accounted for).
 	 */
-	if (cxt->console_size)
-		cxt->pstore.bufsize = 1024; /* LOG_LINE_MAX */
-	cxt->pstore.bufsize = max(cxt->record_size, cxt->pstore.bufsize);
-	cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL);
+	cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size;
+	cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL);
 	if (!cxt->pstore.buf) {
-		pr_err("cannot allocate pstore buffer\n");
+		pr_err("cannot allocate pstore crash dump buffer\n");
 		err = -ENOMEM;
 		goto fail_clear;
 	}
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index a15bc4d48752..30fcec375a3a 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -90,7 +90,10 @@ struct pstore_record {
  *
  * @buf_lock:	spinlock to serialize access to @buf
  * @buf:	preallocated crash dump buffer
- * @bufsize:	size of @buf available for crash dump writes
+ * @bufsize:	size of @buf available for crash dump bytes (must match
+ *		smallest number of bytes available for writing to a
+ *		backend entry, since compressed bytes don't take kindly
+ *		to being truncated)
  *
  * @read_mutex:	serializes @open, @read, @close, and @erase callbacks
  * @flags:	bitfield of frontends the backend can accept writes for
-- 
cgit v1.2.3


From 0c7a52e4d4b5c4d35b31f3c3ad32af814f1bf491 Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Wed, 28 Nov 2018 03:35:23 +0000
Subject: tracepoint: Use __idx instead of idx in DO_TRACE macro to make it
 unique

After enabling KVM event tracing, almost all of trace_kvm_exit()'s
printk shows

	"kvm_exit: IRQ: ..."

even if the actual exception_type is NOT IRQ.  More specifically,
trace_kvm_exit() is defined in virt/kvm/arm/trace.h by TRACE_EVENT.

This slight problem may have existed after commit e6753f23d961
("tracepoint: Make rcuidle tracepoint callers use SRCU"). There are
two variables in trace_kvm_exit() and __DO_TRACE() which have the
same name, *idx*. Thus the actual value of *idx* will be overwritten
when tracing. Fix it by adding a simple prefix.

Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Wang Haibin <wanghaibin.wang@huawei.com>
Cc: linux-trace-devel@vger.kernel.org
Cc: stable@vger.kernel.org
Fixes: e6753f23d961 ("tracepoint: Make rcuidle tracepoint callers use SRCU")
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 538ba1a58f5b..e9de8ad0bad7 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -166,7 +166,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		struct tracepoint_func *it_func_ptr;			\
 		void *it_func;						\
 		void *__data;						\
-		int __maybe_unused idx = 0;				\
+		int __maybe_unused __idx = 0;				\
 									\
 		if (!(cond))						\
 			return;						\
@@ -182,7 +182,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		 * doesn't work from the idle path.			\
 		 */							\
 		if (rcuidle) {						\
-			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
+			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
 			rcu_irq_enter_irqson();				\
 		}							\
 									\
@@ -198,7 +198,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 									\
 		if (rcuidle) {						\
 			rcu_irq_exit_irqson();				\
-			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
+			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
 		}							\
 									\
 		preempt_enable_notrace();				\
-- 
cgit v1.2.3


From e0c274472d5d27f277af722e017525e0b33784cd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 30 Nov 2018 14:09:58 -0800
Subject: psi: make disabling/enabling easier for vendor kernels

Mel Gorman reports a hackbench regression with psi that would prohibit
shipping the suse kernel with it default-enabled, but he'd still like
users to be able to opt in at little to no cost to others.

With the current combination of CONFIG_PSI and the psi_disabled bool set
from the commandline, this is a challenge.  Do the following things to
make it easier:

1. Add a config option CONFIG_PSI_DEFAULT_DISABLED that allows distros
   to enable CONFIG_PSI in their kernel but leave the feature disabled
   unless a user requests it at boot-time.

   To avoid double negatives, rename psi_disabled= to psi=.

2. Make psi_disabled a static branch to eliminate any branch costs
   when the feature is disabled.

In terms of numbers before and after this patch, Mel says:

: The following is a comparision using CONFIG_PSI=n as a baseline against
: your patch and a vanilla kernel
:
:                          4.20.0-rc4             4.20.0-rc4             4.20.0-rc4
:                 kconfigdisable-v1r1                vanilla        psidisable-v1r1
: Amean     1       1.3100 (   0.00%)      1.3923 (  -6.28%)      1.3427 (  -2.49%)
: Amean     3       3.8860 (   0.00%)      4.1230 *  -6.10%*      3.8860 (  -0.00%)
: Amean     5       6.8847 (   0.00%)      8.0390 * -16.77%*      6.7727 (   1.63%)
: Amean     7       9.9310 (   0.00%)     10.8367 *  -9.12%*      9.9910 (  -0.60%)
: Amean     12     16.6577 (   0.00%)     18.2363 *  -9.48%*     17.1083 (  -2.71%)
: Amean     18     26.5133 (   0.00%)     27.8833 *  -5.17%*     25.7663 (   2.82%)
: Amean     24     34.3003 (   0.00%)     34.6830 (  -1.12%)     32.0450 (   6.58%)
: Amean     30     40.0063 (   0.00%)     40.5800 (  -1.43%)     41.5087 (  -3.76%)
: Amean     32     40.1407 (   0.00%)     41.2273 (  -2.71%)     39.9417 (   0.50%)
:
: It's showing that the vanilla kernel takes a hit (as the bisection
: indicated it would) and that disabling PSI by default is reasonably
: close in terms of performance for this particular workload on this
: particular machine so;

Link: http://lkml.kernel.org/r/20181127165329.GA29728@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  4 ++++
 include/linux/psi.h                             |  3 ++-
 init/Kconfig                                    |  9 ++++++++
 kernel/sched/psi.c                              | 30 +++++++++++++++++--------
 kernel/sched/stats.h                            |  8 +++----
 5 files changed, 40 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 675170c36078..5d6ba930d4f4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3505,6 +3505,10 @@
 			before loading.
 			See Documentation/blockdev/ramdisk.txt.
 
+	psi=		[KNL] Enable or disable pressure stall information
+			tracking.
+			Format: <bool>
+
 	psmouse.proto=	[HW,MOUSE] Highest PS2 mouse protocol extension to
 			probe for; one of (bare|imps|exps|lifebook|any).
 	psmouse.rate=	[HW,MOUSE] Set desired mouse report rate, in reports
diff --git a/include/linux/psi.h b/include/linux/psi.h
index 8e0725aac0aa..7006008d5b72 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_PSI_H
 #define _LINUX_PSI_H
 
+#include <linux/jump_label.h>
 #include <linux/psi_types.h>
 #include <linux/sched.h>
 
@@ -9,7 +10,7 @@ struct css_set;
 
 #ifdef CONFIG_PSI
 
-extern bool psi_disabled;
+extern struct static_key_false psi_disabled;
 
 void psi_init(void);
 
diff --git a/init/Kconfig b/init/Kconfig
index a4112e95724a..cf5b5a0dcbc2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -509,6 +509,15 @@ config PSI
 
 	  Say N if unsure.
 
+config PSI_DEFAULT_DISABLED
+	bool "Require boot parameter to enable pressure stall information tracking"
+	default n
+	depends on PSI
+	help
+	  If set, pressure stall information tracking will be disabled
+	  per default but can be enabled through passing psi_enable=1
+	  on the kernel commandline during boot.
+
 endmenu # "CPU/Task time and stats accounting"
 
 config CPU_ISOLATION
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 3d7355d7c3e3..fe24de3fbc93 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -136,8 +136,18 @@
 
 static int psi_bug __read_mostly;
 
-bool psi_disabled __read_mostly;
-core_param(psi_disabled, psi_disabled, bool, 0644);
+DEFINE_STATIC_KEY_FALSE(psi_disabled);
+
+#ifdef CONFIG_PSI_DEFAULT_DISABLED
+bool psi_enable;
+#else
+bool psi_enable = true;
+#endif
+static int __init setup_psi(char *str)
+{
+	return kstrtobool(str, &psi_enable) == 0;
+}
+__setup("psi=", setup_psi);
 
 /* Running averages - we need to be higher-res than loadavg */
 #define PSI_FREQ	(2*HZ+1)	/* 2 sec intervals */
@@ -169,8 +179,10 @@ static void group_init(struct psi_group *group)
 
 void __init psi_init(void)
 {
-	if (psi_disabled)
+	if (!psi_enable) {
+		static_branch_enable(&psi_disabled);
 		return;
+	}
 
 	psi_period = jiffies_to_nsecs(PSI_FREQ);
 	group_init(&psi_system);
@@ -549,7 +561,7 @@ void psi_memstall_enter(unsigned long *flags)
 	struct rq_flags rf;
 	struct rq *rq;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	*flags = current->flags & PF_MEMSTALL;
@@ -579,7 +591,7 @@ void psi_memstall_leave(unsigned long *flags)
 	struct rq_flags rf;
 	struct rq *rq;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	if (*flags)
@@ -600,7 +612,7 @@ void psi_memstall_leave(unsigned long *flags)
 #ifdef CONFIG_CGROUPS
 int psi_cgroup_alloc(struct cgroup *cgroup)
 {
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return 0;
 
 	cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
@@ -612,7 +624,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup)
 
 void psi_cgroup_free(struct cgroup *cgroup)
 {
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	cancel_delayed_work_sync(&cgroup->psi.clock_work);
@@ -637,7 +649,7 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
 	struct rq_flags rf;
 	struct rq *rq;
 
-	if (psi_disabled) {
+	if (static_branch_likely(&psi_disabled)) {
 		/*
 		 * Lame to do this here, but the scheduler cannot be locked
 		 * from the outside, so we move cgroups from inside sched/.
@@ -673,7 +685,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
 {
 	int full;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return -EOPNOTSUPP;
 
 	update_stats(group);
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 4904c4677000..aa0de240fb41 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -66,7 +66,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
 {
 	int clear = 0, set = TSK_RUNNING;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	if (!wakeup || p->sched_psi_wake_requeue) {
@@ -86,7 +86,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
 {
 	int clear = TSK_RUNNING, set = 0;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	if (!sleep) {
@@ -102,7 +102,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
 
 static inline void psi_ttwu_dequeue(struct task_struct *p)
 {
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 	/*
 	 * Is the task being migrated during a wakeup? Make sure to
@@ -128,7 +128,7 @@ static inline void psi_ttwu_dequeue(struct task_struct *p)
 
 static inline void psi_task_tick(struct rq *rq)
 {
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	if (unlikely(rq->curr->flags & PF_MEMSTALL))
-- 
cgit v1.2.3