From 8de111e27688798623b9e9062235bb0cac29f599 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Fri, 27 May 2011 16:56:50 -0300
Subject: [media] lirc_dev: store cdev in irctl, up maxdevs

Store the cdev pointer in struct irctl, allocated dynamically as needed,
rather than having a static array. At the same time, recycle some of the
saved memory to nudge the maximum number of lirc devices supported up a
ways -- its not that uncommon these days, now that we have the rc-core
lirc bridge driver, to see a system with at least 4 raw IR receivers.
(consider a mythtv backend with several video capture devices and the
possible need for IR transmit hardware).

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/lirc_dev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/lirc_dev.h b/include/media/lirc_dev.h
index 630e702c9511..168dd0b1bae2 100644
--- a/include/media/lirc_dev.h
+++ b/include/media/lirc_dev.h
@@ -9,7 +9,7 @@
 #ifndef _LINUX_LIRC_DEV_H
 #define _LINUX_LIRC_DEV_H
 
-#define MAX_IRCTL_DEVICES 4
+#define MAX_IRCTL_DEVICES 8
 #define BUFLEN            16
 
 #define mod(n, div) ((n) % (div))
-- 
cgit v1.2.3


From c30701130cf7bff4f97a148b1bc96f878c046a40 Mon Sep 17 00:00:00 2001
From: "HeungJun, Kim" <riverful.kim@samsung.com>
Date: Tue, 7 Jun 2011 02:00:58 -0300
Subject: [media] m5mols: Use proper email address format

Signed-off-by: HeungJun, Kim <riverful.kim@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/m5mols/m5mols.h          | 4 ++--
 drivers/media/video/m5mols/m5mols_capture.c  | 4 ++--
 drivers/media/video/m5mols/m5mols_controls.c | 4 ++--
 drivers/media/video/m5mols/m5mols_core.c     | 4 ++--
 drivers/media/video/m5mols/m5mols_reg.h      | 4 ++--
 include/media/m5mols.h                       | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/m5mols/m5mols.h b/drivers/media/video/m5mols/m5mols.h
index 9ae170935871..89d09a8914f8 100644
--- a/drivers/media/video/m5mols/m5mols.h
+++ b/drivers/media/video/m5mols/m5mols.h
@@ -2,10 +2,10 @@
  * Header for M-5MOLS 8M Pixel camera sensor with ISP
  *
  * Copyright (C) 2011 Samsung Electronics Co., Ltd.
- * Author: HeungJun Kim, riverful.kim@samsung.com
+ * Author: HeungJun Kim <riverful.kim@samsung.com>
  *
  * Copyright (C) 2009 Samsung Electronics Co., Ltd.
- * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com
+ * Author: Dongsoo Nathaniel Kim <dongsoo45.kim@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/video/m5mols/m5mols_capture.c b/drivers/media/video/m5mols/m5mols_capture.c
index 751f4593da9b..d9471928369d 100644
--- a/drivers/media/video/m5mols/m5mols_capture.c
+++ b/drivers/media/video/m5mols/m5mols_capture.c
@@ -2,10 +2,10 @@
  * The Capture code for Fujitsu M-5MOLS ISP
  *
  * Copyright (C) 2011 Samsung Electronics Co., Ltd.
- * Author: HeungJun Kim, riverful.kim@samsung.com
+ * Author: HeungJun Kim <riverful.kim@samsung.com>
  *
  * Copyright (C) 2009 Samsung Electronics Co., Ltd.
- * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com
+ * Author: Dongsoo Nathaniel Kim <dongsoo45.kim@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/video/m5mols/m5mols_controls.c b/drivers/media/video/m5mols/m5mols_controls.c
index d392c83fbf05..d135d20d09cf 100644
--- a/drivers/media/video/m5mols/m5mols_controls.c
+++ b/drivers/media/video/m5mols/m5mols_controls.c
@@ -2,10 +2,10 @@
  * Controls for M-5MOLS 8M Pixel camera sensor with ISP
  *
  * Copyright (C) 2011 Samsung Electronics Co., Ltd.
- * Author: HeungJun Kim, riverful.kim@samsung.com
+ * Author: HeungJun Kim <riverful.kim@samsung.com>
  *
  * Copyright (C) 2009 Samsung Electronics Co., Ltd.
- * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com
+ * Author: Dongsoo Nathaniel Kim <dongsoo45.kim@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/video/m5mols/m5mols_core.c b/drivers/media/video/m5mols/m5mols_core.c
index 9815f2c75e83..43c68f51c5ce 100644
--- a/drivers/media/video/m5mols/m5mols_core.c
+++ b/drivers/media/video/m5mols/m5mols_core.c
@@ -2,10 +2,10 @@
  * Driver for M-5MOLS 8M Pixel camera sensor with ISP
  *
  * Copyright (C) 2011 Samsung Electronics Co., Ltd.
- * Author: HeungJun Kim, riverful.kim@samsung.com
+ * Author: HeungJun Kim <riverful.kim@samsung.com>
  *
  * Copyright (C) 2009 Samsung Electronics Co., Ltd.
- * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com
+ * Author: Dongsoo Nathaniel Kim <dongsoo45.kim@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/video/m5mols/m5mols_reg.h b/drivers/media/video/m5mols/m5mols_reg.h
index 5f5bdcf608b6..c755bd6edfe9 100644
--- a/drivers/media/video/m5mols/m5mols_reg.h
+++ b/drivers/media/video/m5mols/m5mols_reg.h
@@ -2,10 +2,10 @@
  * Register map for M-5MOLS 8M Pixel camera sensor with ISP
  *
  * Copyright (C) 2011 Samsung Electronics Co., Ltd.
- * Author: HeungJun Kim, riverful.kim@samsung.com
+ * Author: HeungJun Kim <riverful.kim@samsung.com>
  *
  * Copyright (C) 2009 Samsung Electronics Co., Ltd.
- * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com
+ * Author: Dongsoo Nathaniel Kim <dongsoo45.kim@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/include/media/m5mols.h b/include/media/m5mols.h
index 2d7e7ca2313d..aac2c0e06d5e 100644
--- a/include/media/m5mols.h
+++ b/include/media/m5mols.h
@@ -2,10 +2,10 @@
  * Driver header for M-5MOLS 8M Pixel camera sensor with ISP
  *
  * Copyright (C) 2011 Samsung Electronics Co., Ltd.
- * Author: HeungJun Kim, riverful.kim@samsung.com
+ * Author: HeungJun Kim <riverful.kim@samsung.com>
  *
  * Copyright (C) 2009 Samsung Electronics Co., Ltd.
- * Author: Dongsoo Nathaniel Kim, dongsoo45.kim@samsung.com
+ * Author: Dongsoo Nathaniel Kim <dongsoo45.kim@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
-- 
cgit v1.2.3


From 39785eb1d3e6c58cc8bf8f6990956a58037ecc75 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Thu, 23 Jun 2011 20:20:26 +0000
Subject: fsl-diu-fb: remove check for pixel clock ranges

The Freescale DIU framebuffer driver defines two constants, MIN_PIX_CLK and
MAX_PIX_CLK, that are supposed to represent the lower and upper limits of
the pixel clock.  These values, however, are true only for one platform
clock rate (533MHz) and only for the MPC8610.  So the actual range for
the pixel clock is chip-specific, which means the current values are almost
always wrong.  The chance of an out-of-range pixel clock being used are also
remote.

Rather than try to detect an out-of-range clock in the DIU driver, we depend
on the board-specific pixel clock function (e.g. p1022ds_set_pixel_clock)
to clamp the pixel clock to a supported value.

Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/video/fsl-diu-fb.c | 16 ----------------
 include/linux/fsl-diu-fb.h |  6 ------
 2 files changed, 22 deletions(-)

(limited to 'include')

diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c
index bedf5be27f05..0acc7d65aeaa 100644
--- a/drivers/video/fsl-diu-fb.c
+++ b/drivers/video/fsl-diu-fb.c
@@ -555,8 +555,6 @@ static void adjust_aoi_size_position(struct fb_var_screeninfo *var,
 static int fsl_diu_check_var(struct fb_var_screeninfo *var,
 				struct fb_info *info)
 {
-	unsigned long htotal, vtotal;
-
 	pr_debug("check_var xres: %d\n", var->xres);
 	pr_debug("check_var yres: %d\n", var->yres);
 
@@ -635,20 +633,6 @@ static int fsl_diu_check_var(struct fb_var_screeninfo *var,
 
 		break;
 	}
-	/* If the pixclock is below the minimum spec'd value then set to
-	 * refresh rate for 60Hz since this is supported by most monitors.
-	 * Refer to Documentation/fb/ for calculations.
-	 */
-	if ((var->pixclock < MIN_PIX_CLK) || (var->pixclock > MAX_PIX_CLK)) {
-		htotal = var->xres + var->right_margin + var->hsync_len +
-		    var->left_margin;
-		vtotal = var->yres + var->lower_margin + var->vsync_len +
-		    var->upper_margin;
-		var->pixclock = (vtotal * htotal * 6UL) / 100UL;
-		var->pixclock = KHZ2PICOS(var->pixclock);
-		pr_debug("pixclock set for 60Hz refresh = %u ps\n",
-			var->pixclock);
-	}
 
 	var->height = -1;
 	var->width = -1;
diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h
index 781d4671415f..daa9952d2174 100644
--- a/include/linux/fsl-diu-fb.h
+++ b/include/linux/fsl-diu-fb.h
@@ -24,12 +24,6 @@
  * See mpc8610fb_set_par(), map_video_memory(), and unmap_video_memory()
  */
 #define MEM_ALLOC_THRESHOLD (1024*768*4+32)
-/* Minimum value that the pixel clock can be set to in pico seconds
- * This is determined by platform clock/3 where the minimum platform
- * clock is 533MHz. This gives 5629 pico seconds.
- */
-#define MIN_PIX_CLK 5629
-#define MAX_PIX_CLK 96096
 
 #include <linux/types.h>
 
-- 
cgit v1.2.3


From a66b98db570a638afd909459e1e6bfa272344bd3 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Thu, 23 Jun 2011 00:00:24 +0300
Subject: mac80211: fix rx->key NULL dereference during mic failure

Sometimes when reporting a MIC failure rx->key may be unset. This
code path is hit when receiving a packet meant for a multicast
address, and decryption is performed in HW.

Fortunately, the failing key_idx is not used for anything up to
(and including) usermode, so we allow ourselves to drop it on the
way up when a key cannot be retrieved.

Signed-off-by: Arik Nemtsov <arik@wizery.com>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 +-
 net/mac80211/wpa.c     | 8 +++++++-
 net/wireless/nl80211.c | 3 ++-
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0589f554788a..396e8fc8910e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2688,7 +2688,7 @@ void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
  * @dev: network device
  * @addr: The source MAC address of the frame
  * @key_type: The key type that the received frame used
- * @key_id: Key identifier (0..3)
+ * @key_id: Key identifier (0..3). Can be -1 if missing.
  * @tsc: The TSC value of the frame that generated the MIC failure (6 octets)
  * @gfp: allocation flags
  *
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 9dc3b5f26e80..d91c1a26630d 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -154,7 +154,13 @@ update_iv:
 	return RX_CONTINUE;
 
 mic_fail:
-	mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
+	/*
+	 * In some cases the key can be unset - e.g. a multicast packet, in
+	 * a driver that supports HW encryption. Send up the key idx only if
+	 * the key is set.
+	 */
+	mac80211_ev_michael_mic_failure(rx->sdata,
+					rx->key ? rx->key->conf.keyidx : -1,
 					(void *) skb->data, NULL, GFP_ATOMIC);
 	return RX_DROP_UNUSABLE;
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 98fa8eb6cc4b..f07602d7bf68 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6463,7 +6463,8 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 	if (addr)
 		NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr);
 	NLA_PUT_U32(msg, NL80211_ATTR_KEY_TYPE, key_type);
-	NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_id);
+	if (key_id != -1)
+		NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_id);
 	if (tsc)
 		NLA_PUT(msg, NL80211_ATTR_KEY_SEQ, 6, tsc);
 
-- 
cgit v1.2.3


From 15b493d11fcce3c5547e3d7fb6d90e11ffe12777 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 28 Jun 2011 09:48:48 +0200
Subject: drbd: fix limit define, we support 1 PiByte now

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 246f576c981d..447c36752385 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -117,10 +117,10 @@
 /* drbdsetup XY resize -d Z
  * you are free to reduce the device size to nothing, if you want to.
  * the upper limit with 64bit kernel, enough ram and flexible meta data
- * is 16 TB, currently. */
+ * is 1 PiB, currently. */
 /* DRBD_MAX_SECTORS */
 #define DRBD_DISK_SIZE_SECT_MIN  0
-#define DRBD_DISK_SIZE_SECT_MAX  (16 * (2LLU << 30))
+#define DRBD_DISK_SIZE_SECT_MAX  (1 * (2LLU << 40))
 #define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
 
 #define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
-- 
cgit v1.2.3


From 4f3c7a18d9e8a287d31f828a259d713fe4859471 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 30 Jun 2011 15:08:04 +0200
Subject: ALSA: sb16 - Fix build errors on MIPS and others with 13bit ioctl
 size

One of ioctl definition in sound/sb16_csp.h contains the data size
over 8kB, and this causes build errors on architectures like MIPS,
which define _IOC_SIZEBITS=13.

For avoiding this build errors but keeping the compatibility, manually
expand with _IOC() instead of using _IOW() for the problematic ioctl.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/sb16_csp.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/sb16_csp.h b/include/sound/sb16_csp.h
index 736eac71d053..af1b49e982df 100644
--- a/include/sound/sb16_csp.h
+++ b/include/sound/sb16_csp.h
@@ -99,7 +99,14 @@ struct snd_sb_csp_info {
 /* get CSP information */
 #define SNDRV_SB_CSP_IOCTL_INFO		_IOR('H', 0x10, struct snd_sb_csp_info)
 /* load microcode to CSP */
-#define SNDRV_SB_CSP_IOCTL_LOAD_CODE	_IOW('H', 0x11, struct snd_sb_csp_microcode)
+/* NOTE: struct snd_sb_csp_microcode overflows the max size (13 bits)
+ * defined for some architectures like MIPS, and it leads to build errors.
+ * (x86 and co have 14-bit size, thus it's valid, though.)
+ * As a workaround for skipping the size-limit check, here we don't use the
+ * normal _IOW() macro but _IOC() with the manual argument.
+ */
+#define SNDRV_SB_CSP_IOCTL_LOAD_CODE	\
+	_IOC(_IOC_WRITE, 'H', 0x11, sizeof(struct snd_sb_csp_microcode))
 /* unload microcode from CSP */
 #define SNDRV_SB_CSP_IOCTL_UNLOAD_CODE	_IO('H', 0x12)
 /* start CSP */
-- 
cgit v1.2.3


From 957c665f37007de93ccbe45902a23143724170d0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 24 Jun 2011 15:25:00 -0700
Subject: ipv6: Don't put artificial limit on routing table size.

IPV6, unlike IPV4, doesn't have a routing cache.

Routing table entries, as well as clones made in response
to route lookup requests, all live in the same table.  And
all of these things are together collected in the destination
cache table for ipv6.

This means that routing table entries count against the garbage
collection limits, even though such entries cannot ever be reclaimed
and are added explicitly by the administrator (rather than being
created in response to lookups).

Therefore it makes no sense to count ipv6 routing table entries
against the GC limits.

Add a DST_NOCOUNT destination cache entry flag, and skip the counting
if it is set.  Use this flag bit in ipv6 when adding routing table
entries.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h |  1 +
 net/core/dst.c    |  6 ++++--
 net/ipv6/route.c  | 13 +++++++------
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 7d15d238b6ec..e12ddfb9eb16 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -77,6 +77,7 @@ struct dst_entry {
 #define DST_NOPOLICY		0x0004
 #define DST_NOHASH		0x0008
 #define DST_NOCACHE		0x0010
+#define DST_NOCOUNT		0x0020
 	union {
 		struct dst_entry	*next;
 		struct rtable __rcu	*rt_next;
diff --git a/net/core/dst.c b/net/core/dst.c
index 9ccca038444f..6135f3671692 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -190,7 +190,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 	dst->lastuse = jiffies;
 	dst->flags = flags;
 	dst->next = NULL;
-	dst_entries_add(ops, 1);
+	if (!(flags & DST_NOCOUNT))
+		dst_entries_add(ops, 1);
 	return dst;
 }
 EXPORT_SYMBOL(dst_alloc);
@@ -243,7 +244,8 @@ again:
 		neigh_release(neigh);
 	}
 
-	dst_entries_add(dst->ops, -1);
+	if (!(dst->flags & DST_NOCOUNT))
+		dst_entries_add(dst->ops, -1);
 
 	if (dst->ops->destroy)
 		dst->ops->destroy(dst);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c2af4da074b0..0ef1f086feb8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -228,9 +228,10 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 
 /* allocate dst with ip6_dst_ops */
 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
-					     struct net_device *dev)
+					     struct net_device *dev,
+					     int flags)
 {
-	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0);
+	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 
 	memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 
@@ -1042,7 +1043,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	if (unlikely(idev == NULL))
 		return NULL;
 
-	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev);
+	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
 	if (unlikely(rt == NULL)) {
 		in6_dev_put(idev);
 		goto out;
@@ -1206,7 +1207,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		goto out;
 	}
 
-	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL);
+	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
 
 	if (rt == NULL) {
 		err = -ENOMEM;
@@ -1726,7 +1727,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 {
 	struct net *net = dev_net(ort->rt6i_dev);
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-					    ort->dst.dev);
+					    ort->dst.dev, 0);
 
 	if (rt) {
 		rt->dst.input = ort->dst.input;
@@ -2005,7 +2006,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
 	struct net *net = dev_net(idev->dev);
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-					    net->loopback_dev);
+					    net->loopback_dev, 0);
 	struct neighbour *neigh;
 
 	if (rt == NULL) {
-- 
cgit v1.2.3


From e4c2fb0d5776b58049d2556b456144a4db3fe5a9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Jul 2011 10:56:32 +0200
Subject: sched: Disable (revert) SCHED_LOAD_SCALE increase

Alex reported that commit c8b281161df ("sched: Increase
SCHED_LOAD_SCALE resolution") caused a power usage regression
under light load as it increases the number of load-balance
operations and keeps idle cpus from staying idle.

Time has run out to find the root cause for this release so
disable the feature for v3.0 until we can figure out what
causes the problem.

Reported-by: "Alex, Shi" <alex.shi@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nikhil Rao <ncrao@google.com>
Cc: Ming Lei <tom.leiming@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-m4onxn0sxnyn5iz9o88eskc3@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a837b20ba190..496770a96487 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -808,7 +808,7 @@ enum cpu_idle_type {
  * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
  * increased costs.
  */
-#if BITS_PER_LONG > 32
+#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load  */
 # define SCHED_LOAD_RESOLUTION	10
 # define scale_load(w)		((w) << SCHED_LOAD_RESOLUTION)
 # define scale_load_down(w)	((w) >> SCHED_LOAD_RESOLUTION)
-- 
cgit v1.2.3


From 659fb32d1b67476f4ade25e9ea0e2642a5b9c4b5 Mon Sep 17 00:00:00 2001
From: Simon Guinot <sguinot@lacie.com>
Date: Wed, 6 Jul 2011 12:41:31 -0400
Subject: genirq: replace irq_gc_ack() with {set,clr}_bit variants (fwd)

This fixes a regression introduced by e59347a "arm: orion:
Use generic irq chip".

Depending on the device, interrupts acknowledgement is done by setting
or by clearing a dedicated register. Replace irq_gc_ack() with some
{set,clr}_bit variants allows to handle both cases.

Note that this patch affects the following SoCs: Davinci, Samsung and
Orion. Except for this last, the change is minor: irq_gc_ack() is just
renamed into irq_gc_ack_set_bit().

For the Orion SoCs, the edge GPIO interrupts support is currently
broken. irq_gc_ack() try to acknowledge a such interrupt by setting
the corresponding cause register bit. The Orion GPIO device expect the
opposite. To fix this issue, the irq_gc_ack_clr_bit() variant is used.

Tested on Network Space v2.

Reported-by: Joey Oravec <joravec@drewtech.com>
Signed-off-by: Simon Guinot <sguinot@lacie.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-davinci/irq.c      |  2 +-
 arch/arm/plat-orion/gpio.c       |  2 +-
 arch/arm/plat-s5p/irq-gpioint.c  |  2 +-
 arch/arm/plat-samsung/irq-uart.c |  2 +-
 include/linux/irq.h              |  3 ++-
 kernel/irq/generic-chip.c        | 18 ++++++++++++++++--
 6 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-davinci/irq.c b/arch/arm/mach-davinci/irq.c
index bfe68ec4e1a6..d8c1af025931 100644
--- a/arch/arm/mach-davinci/irq.c
+++ b/arch/arm/mach-davinci/irq.c
@@ -53,7 +53,7 @@ davinci_alloc_gc(void __iomem *base, unsigned int irq_start, unsigned int num)
 
 	gc = irq_alloc_generic_chip("AINTC", 1, irq_start, base, handle_edge_irq);
 	ct = gc->chip_types;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_set_bit;
 	ct->chip.irq_mask = irq_gc_mask_clr_bit;
 	ct->chip.irq_unmask = irq_gc_mask_set_bit;
 
diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c
index 5b4fffab1eb4..41ab97ebe4cf 100644
--- a/arch/arm/plat-orion/gpio.c
+++ b/arch/arm/plat-orion/gpio.c
@@ -432,7 +432,7 @@ void __init orion_gpio_init(int gpio_base, int ngpio,
 	ct->regs.mask = ochip->mask_offset + GPIO_EDGE_MASK_OFF;
 	ct->regs.ack = GPIO_EDGE_CAUSE_OFF;
 	ct->type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_clr_bit;
 	ct->chip.irq_mask = irq_gc_mask_clr_bit;
 	ct->chip.irq_unmask = irq_gc_mask_set_bit;
 	ct->chip.irq_set_type = gpio_irq_set_type;
diff --git a/arch/arm/plat-s5p/irq-gpioint.c b/arch/arm/plat-s5p/irq-gpioint.c
index 135abda31c9a..327ab9f662e8 100644
--- a/arch/arm/plat-s5p/irq-gpioint.c
+++ b/arch/arm/plat-s5p/irq-gpioint.c
@@ -152,7 +152,7 @@ static __init int s5p_gpioint_add(struct s3c_gpio_chip *chip)
 	if (!gc)
 		return -ENOMEM;
 	ct = gc->chip_types;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_set_bit;
 	ct->chip.irq_mask = irq_gc_mask_set_bit;
 	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
 	ct->chip.irq_set_type = s5p_gpioint_set_type,
diff --git a/arch/arm/plat-samsung/irq-uart.c b/arch/arm/plat-samsung/irq-uart.c
index 32582c0958e3..0e46588d847b 100644
--- a/arch/arm/plat-samsung/irq-uart.c
+++ b/arch/arm/plat-samsung/irq-uart.c
@@ -55,7 +55,7 @@ static void __init s3c_init_uart_irq(struct s3c_uart_irq *uirq)
 	gc = irq_alloc_generic_chip("s3c-uart", 1, uirq->base_irq, reg_base,
 				    handle_level_irq);
 	ct = gc->chip_types;
-	ct->chip.irq_ack = irq_gc_ack;
+	ct->chip.irq_ack = irq_gc_ack_set_bit;
 	ct->chip.irq_mask = irq_gc_mask_set_bit;
 	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
 	ct->regs.ack = S3C64XX_UINTP;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8b4538446636..baa397eb9c33 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -676,7 +676,8 @@ void irq_gc_mask_disable_reg(struct irq_data *d);
 void irq_gc_mask_set_bit(struct irq_data *d);
 void irq_gc_mask_clr_bit(struct irq_data *d);
 void irq_gc_unmask_enable_reg(struct irq_data *d);
-void irq_gc_ack(struct irq_data *d);
+void irq_gc_ack_set_bit(struct irq_data *d);
+void irq_gc_ack_clr_bit(struct irq_data *d);
 void irq_gc_mask_disable_reg_and_ack(struct irq_data *d);
 void irq_gc_eoi(struct irq_data *d);
 int irq_gc_set_wake(struct irq_data *d, unsigned int on);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 31a9db711906..3a2cab407b93 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -101,10 +101,10 @@ void irq_gc_unmask_enable_reg(struct irq_data *d)
 }
 
 /**
- * irq_gc_ack - Ack pending interrupt
+ * irq_gc_ack_set_bit - Ack pending interrupt via setting bit
  * @d: irq_data
  */
-void irq_gc_ack(struct irq_data *d)
+void irq_gc_ack_set_bit(struct irq_data *d)
 {
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 	u32 mask = 1 << (d->irq - gc->irq_base);
@@ -114,6 +114,20 @@ void irq_gc_ack(struct irq_data *d)
 	irq_gc_unlock(gc);
 }
 
+/**
+ * irq_gc_ack_clr_bit - Ack pending interrupt via clearing bit
+ * @d: irq_data
+ */
+void irq_gc_ack_clr_bit(struct irq_data *d)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	u32 mask = ~(1 << (d->irq - gc->irq_base));
+
+	irq_gc_lock(gc);
+	irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack);
+	irq_gc_unlock(gc);
+}
+
 /**
  * irq_gc_mask_disable_reg_and_ack- Mask and ack pending interrupt
  * @d: irq_data
-- 
cgit v1.2.3


From e206fc5e3de0e38a35b6f92941c913b6d8343fc6 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 13 Jun 2011 09:34:56 -0300
Subject: [media] v4l2-subdev.h: remove unused s_mode tuner op

s_mode is no longer used, so remove it.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-subdev.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 1562c4ff3a65..224502067c43 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -173,16 +173,13 @@ struct v4l2_subdev_core_ops {
 				 struct v4l2_event_subscription *sub);
 };
 
-/* s_mode: switch the tuner to a specific tuner mode. Replacement of s_radio.
-
-   s_radio: v4l device was opened in Radio mode, to be replaced by s_mode.
+/* s_radio: v4l device was opened in radio mode.
 
    s_type_addr: sets tuner type and its I2C addr.
 
    s_config: sets tda9887 specific stuff, like port1, port2 and qss
  */
 struct v4l2_subdev_tuner_ops {
-	int (*s_mode)(struct v4l2_subdev *sd, enum v4l2_tuner_type);
 	int (*s_radio)(struct v4l2_subdev *sd);
 	int (*s_frequency)(struct v4l2_subdev *sd, struct v4l2_frequency *freq);
 	int (*g_frequency)(struct v4l2_subdev *sd, struct v4l2_frequency *freq);
-- 
cgit v1.2.3


From 338e9e1ad541cbb2a3fa5839376ff6c138d40301 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 13 Jun 2011 09:35:56 -0300
Subject: [media] tuner-core/v4l2-subdev: document that the type field has to
 be filled in

The tuner ops g_frequency, g_tuner and s_tuner require that the tuner type
field is filled in. Document this.

The tuner-core doc is based on a patch from Mauro Carvalho Chehab <mchehab@redhat.com>.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/tuner-core.c | 29 +++++++++++++++++++++++++++++
 include/media/v4l2-subdev.h      |  7 +++++++
 2 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 1843fc293322..6c007aa00236 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -1117,6 +1117,16 @@ static int tuner_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f)
 	return 0;
 }
 
+/**
+ * tuner_g_frequency - Get the tuned frequency for the tuner
+ * @sd: pointer to struct v4l2_subdev
+ * @f: pointer to struct v4l2_frequency
+ *
+ * At return, the structure f will be filled with tuner frequency
+ * if the tuner matches the f->type.
+ * Note: f->type should be initialized before calling it.
+ * This is done by either video_ioctl2 or by the bridge driver.
+ */
 static int tuner_g_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f)
 {
 	struct tuner *t = to_tuner(sd);
@@ -1139,6 +1149,16 @@ static int tuner_g_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f)
 	return 0;
 }
 
+/**
+ * tuner_g_tuner - Fill in tuner information
+ * @sd: pointer to struct v4l2_subdev
+ * @vt: pointer to struct v4l2_tuner
+ *
+ * At return, the structure vt will be filled with tuner information
+ * if the tuner matches vt->type.
+ * Note: vt->type should be initialized before calling it.
+ * This is done by either video_ioctl2 or by the bridge driver.
+ */
 static int tuner_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *vt)
 {
 	struct tuner *t = to_tuner(sd);
@@ -1179,6 +1199,15 @@ static int tuner_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *vt)
 	return 0;
 }
 
+/**
+ * tuner_s_tuner - Set the tuner's audio mode
+ * @sd: pointer to struct v4l2_subdev
+ * @vt: pointer to struct v4l2_tuner
+ *
+ * Sets the audio mode if the tuner matches vt->type.
+ * Note: vt->type should be initialized before calling it.
+ * This is done by either video_ioctl2 or by the bridge driver.
+ */
 static int tuner_s_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *vt)
 {
 	struct tuner *t = to_tuner(sd);
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 224502067c43..2884e3e69cb1 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -175,6 +175,13 @@ struct v4l2_subdev_core_ops {
 
 /* s_radio: v4l device was opened in radio mode.
 
+   g_frequency: freq->type must be filled in. Normally done by video_ioctl2
+	or the bridge driver.
+
+   g_tuner:
+   s_tuner: vt->type must be filled in. Normally done by video_ioctl2 or the
+	bridge driver.
+
    s_type_addr: sets tuner type and its I2C addr.
 
    s_config: sets tda9887 specific stuff, like port1, port2 and qss
-- 
cgit v1.2.3


From c902ce1bfb40d8b049bd2319b388b4b68b04bc27 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Jul 2011 12:19:48 +0100
Subject: FS-Cache: Add a helper to bulk uncache pages on an inode

Add an FS-Cache helper to bulk uncache pages on an inode.  This will
only work for the circumstance where the pages in the cache correspond
1:1 with the pages attached to an inode's page cache.

This is required for CIFS and NFS: When disabling inode cookie, we were
returning the cookie and setting cifsi->fscache to NULL but failed to
invalidate any previously mapped pages.  This resulted in "Bad page
state" errors and manifested in other kind of errors when running
fsstress.  Fix it by uncaching mapped pages when we disable the inode
cookie.

This patch should fix the following oops and "Bad page state" errors
seen during fsstress testing.

  ------------[ cut here ]------------
  kernel BUG at fs/cachefiles/namei.c:201!
  invalid opcode: 0000 [#1] SMP
  Pid: 5, comm: kworker/u:0 Not tainted 2.6.38.7-30.fc15.x86_64 #1 Bochs Bochs
  RIP: 0010: cachefiles_walk_to_object+0x436/0x745 [cachefiles]
  RSP: 0018:ffff88002ce6dd00  EFLAGS: 00010282
  RAX: ffff88002ef165f0 RBX: ffff88001811f500 RCX: 0000000000000000
  RDX: 0000000000000000 RSI: 0000000000000100 RDI: 0000000000000282
  RBP: ffff88002ce6dda0 R08: 0000000000000100 R09: ffffffff81b3a300
  R10: 0000ffff00066c0a R11: 0000000000000003 R12: ffff88002ae54840
  R13: ffff88002ae54840 R14: ffff880029c29c00 R15: ffff88001811f4b0
  FS:  00007f394dd32720(0000) GS:ffff88002ef00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
  CR2: 00007fffcb62ddf8 CR3: 000000001825f000 CR4: 00000000000006e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
  Process kworker/u:0 (pid: 5, threadinfo ffff88002ce6c000, task ffff88002ce55cc0)
  Stack:
   0000000000000246 ffff88002ce55cc0 ffff88002ce6dd58 ffff88001815dc00
   ffff8800185246c0 ffff88001811f618 ffff880029c29d18 ffff88001811f380
   ffff88002ce6dd50 ffffffff814757e4 ffff88002ce6dda0 ffffffff8106ac56
  Call Trace:
   cachefiles_lookup_object+0x78/0xd4 [cachefiles]
   fscache_lookup_object+0x131/0x16d [fscache]
   fscache_object_work_func+0x1bc/0x669 [fscache]
   process_one_work+0x186/0x298
   worker_thread+0xda/0x15d
   kthread+0x84/0x8c
   kernel_thread_helper+0x4/0x10
  RIP  cachefiles_walk_to_object+0x436/0x745 [cachefiles]
  ---[ end trace 1d481c9af1804caa ]---

I tested the uncaching by the following means:

 (1) Create a big file on my NFS server (104857600 bytes).

 (2) Read the file into the cache with md5sum on the NFS client.  Look in
     /proc/fs/fscache/stats:

	Pages  : mrk=25601 unc=0

 (3) Open the file for read/write ("bash 5<>/warthog/bigfile").  Look in proc
     again:

	Pages  : mrk=25601 unc=25601

Reported-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-and-Tested-by: Suresh Jayaraman <sjayaraman@suse.de>
cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/caching/netfs-api.txt | 16 +++++++++
 fs/cifs/fscache.c                               |  1 +
 fs/fscache/page.c                               | 44 +++++++++++++++++++++++++
 fs/nfs/fscache.c                                |  8 ++---
 include/linux/fscache.h                         | 21 ++++++++++++
 5 files changed, 85 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index a167ab876c35..7cc6bf2871eb 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -673,6 +673,22 @@ storage request to complete, or it may attempt to cancel the storage request -
 in which case the page will not be stored in the cache this time.
 
 
+BULK INODE PAGE UNCACHE
+-----------------------
+
+A convenience routine is provided to perform an uncache on all the pages
+attached to an inode.  This assumes that the pages on the inode correspond on a
+1:1 basis with the pages in the cache.
+
+	void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
+					     struct inode *inode);
+
+This takes the netfs cookie that the pages were cached with and the inode that
+the pages are attached to.  This function will wait for pages to finish being
+written to the cache and for the cache to finish with the page generally.  No
+error is returned.
+
+
 ==========================
 INDEX AND DATA FILE UPDATE
 ==========================
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 816696621ec9..42e5363b4102 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -92,6 +92,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode)
 
 	if (cifsi->fscache) {
 		cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
+		fscache_uncache_all_inode_pages(cifsi->fscache, inode);
 		fscache_relinquish_cookie(cifsi->fscache, 1);
 		cifsi->fscache = NULL;
 	}
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index a2a5d19ece6a..2f343b4d7a7d 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -954,3 +954,47 @@ void fscache_mark_pages_cached(struct fscache_retrieval *op,
 	pagevec_reinit(pagevec);
 }
 EXPORT_SYMBOL(fscache_mark_pages_cached);
+
+/*
+ * Uncache all the pages in an inode that are marked PG_fscache, assuming them
+ * to be associated with the given cookie.
+ */
+void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
+				       struct inode *inode)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct pagevec pvec;
+	pgoff_t next;
+	int i;
+
+	_enter("%p,%p", cookie, inode);
+
+	if (!mapping || mapping->nrpages == 0) {
+		_leave(" [no pages]");
+		return;
+	}
+
+	pagevec_init(&pvec, 0);
+	next = 0;
+	while (next <= (loff_t)-1 &&
+	       pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)
+	       ) {
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+			pgoff_t page_index = page->index;
+
+			ASSERTCMP(page_index, >=, next);
+			next = page_index + 1;
+
+			if (PageFsCache(page)) {
+				__fscache_wait_on_page_write(cookie, page);
+				__fscache_uncache_page(cookie, page);
+			}
+		}
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+
+	_leave("");
+}
+EXPORT_SYMBOL(__fscache_uncache_all_inode_pages);
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index ce153a6b3aec..419119c371bf 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -259,12 +259,10 @@ static void nfs_fscache_disable_inode_cookie(struct inode *inode)
 		dfprintk(FSCACHE,
 			 "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode));
 
-		/* Need to invalidate any mapped pages that were read in before
-		 * turning off the cache.
+		/* Need to uncache any pages attached to this inode that
+		 * fscache knows about before turning off the cache.
 		 */
-		if (inode->i_mapping && inode->i_mapping->nrpages)
-			invalidate_inode_pages2(inode->i_mapping);
-
+		fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode);
 		nfs_fscache_zap_inode_cookie(inode);
 	}
 }
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 7c4d72f5581f..9ec20dec3353 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -204,6 +204,8 @@ extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *);
 extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *);
 extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *,
 					 gfp_t);
+extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *,
+					      struct inode *);
 
 /**
  * fscache_register_netfs - Register a filesystem as desiring caching services
@@ -643,4 +645,23 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie,
 	return false;
 }
 
+/**
+ * fscache_uncache_all_inode_pages - Uncache all an inode's pages
+ * @cookie: The cookie representing the inode's cache object.
+ * @inode: The inode to uncache pages from.
+ *
+ * Uncache all the pages in an inode that are marked PG_fscache, assuming them
+ * to be associated with the given cookie.
+ *
+ * This function may sleep.  It will wait for pages that are being written out
+ * and will wait whilst the PG_fscache mark is removed by the cache.
+ */
+static inline
+void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
+				     struct inode *inode)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_uncache_all_inode_pages(cookie, inode);
+}
+
 #endif /* _LINUX_FSCACHE_H */
-- 
cgit v1.2.3


From f8d9605243280f1870dd2c6c37a735b925c15f3c Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Thu, 7 Jul 2011 00:28:35 +0000
Subject: sctp: Enforce retransmission limit during shutdown

When initiating a graceful shutdown while having data chunks
on the retransmission queue with a peer which is in zero
window mode the shutdown is never completed because the
retransmission error count is reset periodically by the
following two rules:

 - Do not timeout association while doing zero window probe.
 - Reset overall error count when a heartbeat request has
   been acknowledged.

The graceful shutdown will wait for all outstanding TSN to
be acknowledged before sending the SHUTDOWN request. This
never happens due to the peer's zero window not acknowledging
the continuously retransmitted data chunks. Although the
error counter is incremented for each failed retransmission,
the receiving of the SACK announcing the zero window clears
the error count again immediately. Also heartbeat requests
continue to be sent periodically. The peer acknowledges these
requests causing the error counter to be reset as well.

This patch changes behaviour to only reset the overall error
counter for the above rules while not in shutdown. After
reaching the maximum number of retransmission attempts, the
T5 shutdown guard timer is scheduled to give the receiver
some additional time to recover. The timer is stopped as soon
as the receiver acknowledges any data.

The issue can be easily reproduced by establishing a sctp
association over the loopback device, constantly queueing
data at the sender while not reading any at the receiver.
Wait for the window to reach zero, then initiate a shutdown
by killing both processes simultaneously. The association
will never be freed and the chunks on the retransmission
queue will be retransmitted indefinitely.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h |  1 +
 net/sctp/outqueue.c        | 20 +++++++++++++++++++-
 net/sctp/sm_sideeffect.c   | 20 ++++++++++++++++++--
 net/sctp/sm_statefuns.c    | 32 +++++++++++++++++++++++---------
 net/sctp/sm_statetable.c   |  2 +-
 5 files changed, 62 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index dd6847e5d6e4..6506458ccd33 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -63,6 +63,7 @@ typedef enum {
 	SCTP_CMD_ECN_ECNE,	/* Do delayed ECNE processing. */
 	SCTP_CMD_ECN_CWR,	/* Do delayed CWR processing.  */
 	SCTP_CMD_TIMER_START,	/* Start a timer.  */
+	SCTP_CMD_TIMER_START_ONCE, /* Start a timer once */
 	SCTP_CMD_TIMER_RESTART,	/* Restart a timer. */
 	SCTP_CMD_TIMER_STOP,	/* Stop a timer. */
 	SCTP_CMD_INIT_CHOOSE_TRANSPORT, /* Choose transport for an INIT. */
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 1c88c8911dc5..d03682109b7a 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1582,6 +1582,8 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 #endif /* SCTP_DEBUG */
 	if (transport) {
 		if (bytes_acked) {
+			struct sctp_association *asoc = transport->asoc;
+
 			/* We may have counted DATA that was migrated
 			 * to this transport due to DEL-IP operation.
 			 * Subtract those bytes, since the were never
@@ -1600,6 +1602,17 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 			transport->error_count = 0;
 			transport->asoc->overall_error_count = 0;
 
+			/*
+			 * While in SHUTDOWN PENDING, we may have started
+			 * the T5 shutdown guard timer after reaching the
+			 * retransmission limit. Stop that timer as soon
+			 * as the receiver acknowledged any data.
+			 */
+			if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING &&
+			    del_timer(&asoc->timers
+				[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]))
+					sctp_association_put(asoc);
+
 			/* Mark the destination transport address as
 			 * active if it is not so marked.
 			 */
@@ -1629,10 +1642,15 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 			 * A sender is doing zero window probing when the
 			 * receiver's advertised window is zero, and there is
 			 * only one data chunk in flight to the receiver.
+			 *
+			 * Allow the association to timeout while in SHUTDOWN
+			 * PENDING or SHUTDOWN RECEIVED in case the receiver
+			 * stays in zero window mode forever.
 			 */
 			if (!q->asoc->peer.rwnd &&
 			    !list_empty(&tlist) &&
-			    (sack_ctsn+2 == q->asoc->next_tsn)) {
+			    (sack_ctsn+2 == q->asoc->next_tsn) &&
+			    q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) {
 				SCTP_DEBUG_PRINTK("%s: SACK received for zero "
 						  "window probe: %u\n",
 						  __func__, sack_ctsn);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 534c2e5feb05..6e0f88295aaf 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -670,10 +670,19 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
 	/* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
 	 * HEARTBEAT should clear the error counter of the destination
 	 * transport address to which the HEARTBEAT was sent.
-	 * The association's overall error count is also cleared.
 	 */
 	t->error_count = 0;
-	t->asoc->overall_error_count = 0;
+
+	/*
+	 * Although RFC4960 specifies that the overall error count must
+	 * be cleared when a HEARTBEAT ACK is received, we make an
+	 * exception while in SHUTDOWN PENDING. If the peer keeps its
+	 * window shut forever, we may never be able to transmit our
+	 * outstanding data and rely on the retransmission limit be reached
+	 * to shutdown the association.
+	 */
+	if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING)
+		t->asoc->overall_error_count = 0;
 
 	/* Clear the hb_sent flag to signal that we had a good
 	 * acknowledgement.
@@ -1437,6 +1446,13 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr);
 			break;
 
+		case SCTP_CMD_TIMER_START_ONCE:
+			timer = &asoc->timers[cmd->obj.to];
+
+			if (timer_pending(timer))
+				break;
+			/* fall through */
+
 		case SCTP_CMD_TIMER_START:
 			timer = &asoc->timers[cmd->obj.to];
 			timeout = asoc->timeouts[cmd->obj.to];
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index a297283154d5..246117142b5c 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5154,7 +5154,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
 	 * The sender of the SHUTDOWN MAY also start an overall guard timer
 	 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
 	 */
-	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
 	if (asoc->autoclose)
@@ -5299,14 +5299,28 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
 	SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
 
 	if (asoc->overall_error_count >= asoc->max_retrans) {
-		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
-				SCTP_ERROR(ETIMEDOUT));
-		/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
-		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
-				SCTP_PERR(SCTP_ERROR_NO_ERROR));
-		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
-		return SCTP_DISPOSITION_DELETE_TCB;
+		if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
+			/*
+			 * We are here likely because the receiver had its rwnd
+			 * closed for a while and we have not been able to
+			 * transmit the locally queued data within the maximum
+			 * retransmission attempts limit.  Start the T5
+			 * shutdown guard timer to give the receiver one last
+			 * chance and some additional time to recover before
+			 * aborting.
+			 */
+			sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START_ONCE,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
+		} else {
+			sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
+					SCTP_ERROR(ETIMEDOUT));
+			/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
+			sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+					SCTP_PERR(SCTP_ERROR_NO_ERROR));
+			SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+			SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+			return SCTP_DISPOSITION_DELETE_TCB;
+		}
 	}
 
 	/* E1) For the destination address for which the timer
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 0338dc6fdc9d..7c211a7f90f4 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -827,7 +827,7 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
 	/* SCTP_STATE_ESTABLISHED */ \
 	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
 	/* SCTP_STATE_SHUTDOWN_PENDING */ \
-	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+	TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
 	/* SCTP_STATE_SHUTDOWN_SENT */ \
 	TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
-- 
cgit v1.2.3


From cd4fcc704f30f2064ab30b5300d44d431e46db50 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Fri, 8 Jul 2011 04:37:46 +0000
Subject: sctp: ABORT if receive, reassmbly, or reodering queue is not empty
 while closing socket

Trigger user ABORT if application closes a socket which has data
queued on the socket receive queue or chunks waiting on the
reassembly or ordering queue as this would imply data being lost
which defeats the point of a graceful shutdown.

This behavior is already practiced in TCP.

We do not check the input queue because that would mean to parse
all chunks on it to look for unacknowledged data which seems too
much of an effort. Control chunks or duplicated chunks may also
be in the input queue and should not be stopping a graceful
shutdown.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/ulpevent.h |  2 +-
 net/sctp/socket.c           | 13 ++++++++-----
 net/sctp/ulpevent.c         | 16 +++++++++++++---
 3 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 99b027b2adce..ca4693b4e09e 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -80,7 +80,7 @@ static inline struct sctp_ulpevent *sctp_skb2event(struct sk_buff *skb)
 
 void sctp_ulpevent_free(struct sctp_ulpevent *);
 int sctp_ulpevent_is_notification(const struct sctp_ulpevent *);
-void sctp_queue_purge_ulpevents(struct sk_buff_head *list);
+unsigned int sctp_queue_purge_ulpevents(struct sk_buff_head *list);
 
 struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
 	const struct sctp_association *asoc,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 08c6238802de..d3ccf7973c59 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1384,6 +1384,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
 	struct sctp_endpoint *ep;
 	struct sctp_association *asoc;
 	struct list_head *pos, *temp;
+	unsigned int data_was_unread;
 
 	SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout);
 
@@ -1393,6 +1394,10 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
 
 	ep = sctp_sk(sk)->ep;
 
+	/* Clean up any skbs sitting on the receive queue.  */
+	data_was_unread = sctp_queue_purge_ulpevents(&sk->sk_receive_queue);
+	data_was_unread += sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby);
+
 	/* Walk all associations on an endpoint.  */
 	list_for_each_safe(pos, temp, &ep->asocs) {
 		asoc = list_entry(pos, struct sctp_association, asocs);
@@ -1410,7 +1415,9 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
 			}
 		}
 
-		if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+		if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) ||
+		    !skb_queue_empty(&asoc->ulpq.reasm) ||
+		    (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) {
 			struct sctp_chunk *chunk;
 
 			chunk = sctp_make_abort_user(asoc, NULL, 0);
@@ -1420,10 +1427,6 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
 			sctp_primitive_SHUTDOWN(asoc, NULL);
 	}
 
-	/* Clean up any skbs sitting on the receive queue.  */
-	sctp_queue_purge_ulpevents(&sk->sk_receive_queue);
-	sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby);
-
 	/* On a TCP-style socket, block for at most linger_time if set. */
 	if (sctp_style(sk, TCP) && timeout)
 		sctp_wait_for_close(sk, timeout);
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index e70e5fc87890..8a84017834c2 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -1081,9 +1081,19 @@ void sctp_ulpevent_free(struct sctp_ulpevent *event)
 }
 
 /* Purge the skb lists holding ulpevents. */
-void sctp_queue_purge_ulpevents(struct sk_buff_head *list)
+unsigned int sctp_queue_purge_ulpevents(struct sk_buff_head *list)
 {
 	struct sk_buff *skb;
-	while ((skb = skb_dequeue(list)) != NULL)
-		sctp_ulpevent_free(sctp_skb2event(skb));
+	unsigned int data_unread = 0;
+
+	while ((skb = skb_dequeue(list)) != NULL) {
+		struct sctp_ulpevent *event = sctp_skb2event(skb);
+
+		if (!sctp_ulpevent_is_notification(event))
+			data_unread += skb->len;
+
+		sctp_ulpevent_free(event);
+	}
+
+	return data_unread;
 }
-- 
cgit v1.2.3


From f607e7fc5fb94d92030c4527287e9c149ddf9e65 Mon Sep 17 00:00:00 2001
From: Jean-François Dagenais <dagenaisj@sonatest.com>
Date: Fri, 8 Jul 2011 15:39:44 -0700
Subject: w1: ds1wm: add a reset recovery parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a regression in 3.0 reported by Paul Parsons regarding the
removal of the msleep(1) in the ds1wm_reset() function:

: The linux-3.0-rc4 DS1WM 1-wire driver is logging "bus error, retrying"
: error messages on an HP iPAQ hx4700 PDA (XScale-PXA270):
:
: <snip>
: Driver for 1-wire Dallas network protocol.
: DS1WM w1 busmaster driver - (c) 2004 Szabolcs Gyurko
: 1-Wire driver for the DS2760 battery monitor  chip  - (c) 2004-2005, Szabolcs Gyurko
: ds1wm ds1wm: pass: 1 bus error, retrying
: ds1wm ds1wm: pass: 2 bus error, retrying
: ds1wm ds1wm: pass: 3 bus error, retrying
: ds1wm ds1wm: pass: 4 bus error, retrying
: ds1wm ds1wm: pass: 5 bus error, retrying
: ...
:
: The visible result is that the battery charging LED is erratic; sometimes
: it works, mostly it doesn't.
:
: The linux-2.6.39 DS1WM 1-wire driver worked OK.  I haven't tried 3.0-rc1,
: 3.0-rc2, or 3.0-rc3.

This sleep should not be required on normal circuitry provided the
pull-ups on the bus are correctly adapted to the slaves.  Unfortunately,
this is not always the case.  The sleep is restored but as a parameter to
the probe function in the pdata.

[akpm@linux-foundation.org: coding-style fixes]
Reported-by: Paul Parsons <lost.distance@yahoo.com>
Tested-by: Paul Parsons <lost.distance@yahoo.com>
Signed-off-by: Jean-François Dagenais <dagenaisj@sonatest.com>
Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/asic3.c        | 1 +
 drivers/mfd/htc-pasic3.c   | 1 +
 drivers/w1/masters/ds1wm.c | 5 +++++
 include/linux/mfd/ds1wm.h  | 7 +++++++
 4 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index c27fd1fc3b86..c71ae09430c5 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -619,6 +619,7 @@ static void asic3_clk_disable(struct asic3 *asic, struct asic3_clk *clk)
 /* MFD cells (SPI, PWM, LED, DS1WM, MMC) */
 static struct ds1wm_driver_data ds1wm_pdata = {
 	.active_high = 1,
+	.reset_recover_delay = 1,
 };
 
 static struct resource ds1wm_resources[] = {
diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c
index 2808bd125d13..04c7093d6499 100644
--- a/drivers/mfd/htc-pasic3.c
+++ b/drivers/mfd/htc-pasic3.c
@@ -99,6 +99,7 @@ static int ds1wm_disable(struct platform_device *pdev)
 
 static struct ds1wm_driver_data ds1wm_pdata = {
 	.active_high = 0,
+	.reset_recover_delay = 1,
 };
 
 static struct resource ds1wm_resources[] __initdata = {
diff --git a/drivers/w1/masters/ds1wm.c b/drivers/w1/masters/ds1wm.c
index ad57593d224a..a0c8965c1a79 100644
--- a/drivers/w1/masters/ds1wm.c
+++ b/drivers/w1/masters/ds1wm.c
@@ -109,6 +109,7 @@ struct ds1wm_data {
 	/* byte to write that makes all intr disabled, */
 	/* considering active_state (IAS) (optimization) */
 	u8       int_en_reg_none;
+	unsigned int reset_recover_delay; /* see ds1wm.h */
 };
 
 static inline void ds1wm_write_register(struct ds1wm_data *ds1wm_data, u32 reg,
@@ -187,6 +188,9 @@ static int ds1wm_reset(struct ds1wm_data *ds1wm_data)
 		return 1;
 	}
 
+	if (ds1wm_data->reset_recover_delay)
+		msleep(ds1wm_data->reset_recover_delay);
+
 	return 0;
 }
 
@@ -490,6 +494,7 @@ static int ds1wm_probe(struct platform_device *pdev)
 	}
 	ds1wm_data->irq = res->start;
 	ds1wm_data->int_en_reg_none = (plat->active_high ? DS1WM_INTEN_IAS : 0);
+	ds1wm_data->reset_recover_delay = plat->reset_recover_delay;
 
 	if (res->flags & IORESOURCE_IRQ_HIGHEDGE)
 		irq_set_irq_type(ds1wm_data->irq, IRQ_TYPE_EDGE_RISING);
diff --git a/include/linux/mfd/ds1wm.h b/include/linux/mfd/ds1wm.h
index be469a357cbb..38a372a0e285 100644
--- a/include/linux/mfd/ds1wm.h
+++ b/include/linux/mfd/ds1wm.h
@@ -3,4 +3,11 @@
 struct ds1wm_driver_data {
 	int active_high;
 	int clock_rate;
+	/* in milliseconds, the amount of time to */
+	/* sleep following a reset pulse. Zero    */
+	/* should work if your bus devices recover*/
+	/* time respects the 1-wire spec since the*/
+	/* ds1wm implements the precise timings of*/
+	/* a reset pulse/presence detect sequence.*/
+	unsigned int reset_recover_delay;
 };
-- 
cgit v1.2.3


From a63fdc5156f2ef5690b6cf03d72b0c4917efbba7 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 14 Jun 2011 10:57:50 +1000
Subject: mm: Move definition of MIN_MEMORY_BLOCK_SIZE to a header

The macro MIN_MEMORY_BLOCK_SIZE is currently defined twice in two .c
files, and I need it in a third one to fix a powerpc bug, so let's
first move it into a header

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c  | 3 +--
 drivers/base/memory.c  | 1 -
 include/linux/memory.h | 2 ++
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d865c4aeec55..bbaaa005bf0e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -28,6 +28,7 @@
 #include <linux/poison.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
+#include <linux/memory.h>
 #include <linux/memory_hotplug.h>
 #include <linux/nmi.h>
 #include <linux/gfp.h>
@@ -895,8 +896,6 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 }
 
 #ifdef CONFIG_X86_UV
-#define MIN_MEMORY_BLOCK_SIZE   (1 << SECTION_SIZE_BITS)
-
 unsigned long memory_block_size_bytes(void)
 {
 	if (is_uv_system()) {
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 9f9b2359f718..45d7c8fc73bd 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -30,7 +30,6 @@
 static DEFINE_MUTEX(mem_sysfs_mutex);
 
 #define MEMORY_CLASS_NAME	"memory"
-#define MIN_MEMORY_BLOCK_SIZE	(1 << SECTION_SIZE_BITS)
 
 static int sections_per_block;
 
diff --git a/include/linux/memory.h b/include/linux/memory.h
index e1e3b2b84f85..935699b30b7c 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -20,6 +20,8 @@
 #include <linux/compiler.h>
 #include <linux/mutex.h>
 
+#define MIN_MEMORY_BLOCK_SIZE     (1 << SECTION_SIZE_BITS)
+
 struct memory_block {
 	unsigned long start_section_nr;
 	unsigned long end_section_nr;
-- 
cgit v1.2.3


From 07e49a7a31153a95caa270d8ad7350a0bcd4d511 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Jul 2011 20:44:25 +0200
Subject: ACPI: Fix lockdep false positives in acpi_power_off()

All ACPICA locks are allocated by the same function,
acpi_os_create_lock(), with the help of a local variable called
"lock".  Thus, when lockdep is enabled, it uses "lock" as the
name of all those locks and regards them as instances of the same
lock, which causes it to report possible locking problems with them
when there aren't any.

To work around this problem, define acpi_os_create_lock() as a macro
and make it pass its argument to spin_lock_init(), so that lockdep
uses it as the name of the new lock.  Define this macron in a
Linux-specific file, to minimize the resulting modifications of
the OS-independent ACPICA parts.

This change is based on an earlier patch from Andrea Righi and it
addresses a regression from 2.6.39 tracked as
https://bugzilla.kernel.org/show_bug.cgi?id=38152

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Andrea Righi <andrea@betterlinux.com>
Reviewed-by: Florian Mickler <florian@mickler.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c              | 17 -----------------
 include/acpi/acpiosxf.h         |  3 +++
 include/acpi/platform/aclinux.h | 18 ++++++++++++++++++
 3 files changed, 21 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 52ca9649d769..372f9b70f7f4 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1332,23 +1332,6 @@ int acpi_resources_are_enforced(void)
 }
 EXPORT_SYMBOL(acpi_resources_are_enforced);
 
-/*
- * Create and initialize a spinlock.
- */
-acpi_status
-acpi_os_create_lock(acpi_spinlock *out_handle)
-{
-	spinlock_t *lock;
-
-	lock = ACPI_ALLOCATE(sizeof(spinlock_t));
-	if (!lock)
-		return AE_NO_MEMORY;
-	spin_lock_init(lock);
-	*out_handle = lock;
-
-	return AE_OK;
-}
-
 /*
  * Deallocate the memory for a spinlock.
  */
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index a756bc8d866d..4543b6f75867 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -98,8 +98,11 @@ acpi_os_table_override(struct acpi_table_header *existing_table,
 /*
  * Spinlock primitives
  */
+
+#ifndef acpi_os_create_lock
 acpi_status
 acpi_os_create_lock(acpi_spinlock *out_handle);
+#endif
 
 void acpi_os_delete_lock(acpi_spinlock handle);
 
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 5d2a5e9544d9..2ce1be9f6291 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -159,6 +159,24 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
 	} while (0)
 #endif
 
+/*
+ * When lockdep is enabled, the spin_lock_init() macro stringifies it's
+ * argument and uses that as a name for the lock in debugging.
+ * By executing spin_lock_init() in a macro the key changes from "lock" for
+ * all locks to the name of the argument of acpi_os_create_lock(), which
+ * prevents lockdep from reporting false positives for ACPICA locks.
+ */
+#define acpi_os_create_lock(__handle)				\
+({								\
+	spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock));	\
+								\
+	if (lock) {						\
+		*(__handle) = lock;				\
+		spin_lock_init(*(__handle));			\
+	}							\
+	lock ? AE_OK : AE_NO_MEMORY;				\
+})
+
 #endif /* __KERNEL__ */
 
 #endif /* __ACLINUX_H__ */
-- 
cgit v1.2.3


From f39b2dd9d065151a04f5996656d1f27a7eb32d45 Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Thu, 7 Jul 2011 09:04:55 -0700
Subject: mmc: core: Bus width testing needs to handle suspend/resume

On reading the ext_csd for the first time (in 1 bit mode), save the
ext_csd information needed for bus width compare.

On every pass we make re-reading the ext_csd, compare the data
against the saved ext_csd data.

This fixes a regression introduced in 3.0-rc1 by 08ee80cc397ac1a3
("mmc: core: eMMC bus width may not work on all platforms"), which
incorrectly assumed we would be re-reading the ext_csd at resume-
time.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c   | 77 +++++++++++++++++++++++++++++++-----------------
 include/linux/mmc/card.h | 13 ++++++++
 2 files changed, 63 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 2a7e43bc796d..aa7d1d79b8c5 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -247,12 +247,12 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		return 0;
 
 	/* Version is coded in the CSD_STRUCTURE byte in the EXT_CSD register */
+	card->ext_csd.raw_ext_csd_structure = ext_csd[EXT_CSD_STRUCTURE];
 	if (card->csd.structure == 3) {
-		int ext_csd_struct = ext_csd[EXT_CSD_STRUCTURE];
-		if (ext_csd_struct > 2) {
+		if (card->ext_csd.raw_ext_csd_structure > 2) {
 			printk(KERN_ERR "%s: unrecognised EXT_CSD structure "
 				"version %d\n", mmc_hostname(card->host),
-					ext_csd_struct);
+					card->ext_csd.raw_ext_csd_structure);
 			err = -EINVAL;
 			goto out;
 		}
@@ -266,6 +266,10 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		goto out;
 	}
 
+	card->ext_csd.raw_sectors[0] = ext_csd[EXT_CSD_SEC_CNT + 0];
+	card->ext_csd.raw_sectors[1] = ext_csd[EXT_CSD_SEC_CNT + 1];
+	card->ext_csd.raw_sectors[2] = ext_csd[EXT_CSD_SEC_CNT + 2];
+	card->ext_csd.raw_sectors[3] = ext_csd[EXT_CSD_SEC_CNT + 3];
 	if (card->ext_csd.rev >= 2) {
 		card->ext_csd.sectors =
 			ext_csd[EXT_CSD_SEC_CNT + 0] << 0 |
@@ -277,7 +281,7 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512)
 			mmc_card_set_blockaddr(card);
 	}
-
+	card->ext_csd.raw_card_type = ext_csd[EXT_CSD_CARD_TYPE];
 	switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) {
 	case EXT_CSD_CARD_TYPE_DDR_52 | EXT_CSD_CARD_TYPE_52 |
 	     EXT_CSD_CARD_TYPE_26:
@@ -307,6 +311,11 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 			mmc_hostname(card->host));
 	}
 
+	card->ext_csd.raw_s_a_timeout = ext_csd[EXT_CSD_S_A_TIMEOUT];
+	card->ext_csd.raw_erase_timeout_mult =
+		ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT];
+	card->ext_csd.raw_hc_erase_grp_size =
+		ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE];
 	if (card->ext_csd.rev >= 3) {
 		u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
 		card->ext_csd.part_config = ext_csd[EXT_CSD_PART_CONFIG];
@@ -334,6 +343,16 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		card->ext_csd.boot_size = ext_csd[EXT_CSD_BOOT_MULT] << 17;
 	}
 
+	card->ext_csd.raw_hc_erase_gap_size =
+		ext_csd[EXT_CSD_PARTITION_ATTRIBUTE];
+	card->ext_csd.raw_sec_trim_mult =
+		ext_csd[EXT_CSD_SEC_TRIM_MULT];
+	card->ext_csd.raw_sec_erase_mult =
+		ext_csd[EXT_CSD_SEC_ERASE_MULT];
+	card->ext_csd.raw_sec_feature_support =
+		ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT];
+	card->ext_csd.raw_trim_mult =
+		ext_csd[EXT_CSD_TRIM_MULT];
 	if (card->ext_csd.rev >= 4) {
 		/*
 		 * Enhanced area feature support -- check whether the eMMC
@@ -341,7 +360,7 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		 * area offset and size to user by adding sysfs interface.
 		 */
 		if ((ext_csd[EXT_CSD_PARTITION_SUPPORT] & 0x2) &&
-				(ext_csd[EXT_CSD_PARTITION_ATTRIBUTE] & 0x1)) {
+		    (ext_csd[EXT_CSD_PARTITION_ATTRIBUTE] & 0x1)) {
 			u8 hc_erase_grp_sz =
 				ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE];
 			u8 hc_wp_grp_sz =
@@ -401,17 +420,17 @@ static inline void mmc_free_ext_csd(u8 *ext_csd)
 }
 
 
-static int mmc_compare_ext_csds(struct mmc_card *card, u8 *ext_csd,
-			unsigned bus_width)
+static int mmc_compare_ext_csds(struct mmc_card *card, unsigned bus_width)
 {
 	u8 *bw_ext_csd;
 	int err;
 
+	if (bus_width == MMC_BUS_WIDTH_1)
+		return 0;
+
 	err = mmc_get_ext_csd(card, &bw_ext_csd);
-	if (err)
-		return err;
 
-	if ((ext_csd == NULL || bw_ext_csd == NULL)) {
+	if (err || bw_ext_csd == NULL) {
 		if (bus_width != MMC_BUS_WIDTH_1)
 			err = -EINVAL;
 		goto out;
@@ -421,35 +440,40 @@ static int mmc_compare_ext_csds(struct mmc_card *card, u8 *ext_csd,
 		goto out;
 
 	/* only compare read only fields */
-	err = (!(ext_csd[EXT_CSD_PARTITION_SUPPORT] ==
+	err = (!(card->ext_csd.raw_partition_support ==
 			bw_ext_csd[EXT_CSD_PARTITION_SUPPORT]) &&
-		(ext_csd[EXT_CSD_ERASED_MEM_CONT] ==
+		(card->ext_csd.raw_erased_mem_count ==
 			bw_ext_csd[EXT_CSD_ERASED_MEM_CONT]) &&
-		(ext_csd[EXT_CSD_REV] ==
+		(card->ext_csd.rev ==
 			bw_ext_csd[EXT_CSD_REV]) &&
-		(ext_csd[EXT_CSD_STRUCTURE] ==
+		(card->ext_csd.raw_ext_csd_structure ==
 			bw_ext_csd[EXT_CSD_STRUCTURE]) &&
-		(ext_csd[EXT_CSD_CARD_TYPE] ==
+		(card->ext_csd.raw_card_type ==
 			bw_ext_csd[EXT_CSD_CARD_TYPE]) &&
-		(ext_csd[EXT_CSD_S_A_TIMEOUT] ==
+		(card->ext_csd.raw_s_a_timeout ==
 			bw_ext_csd[EXT_CSD_S_A_TIMEOUT]) &&
-		(ext_csd[EXT_CSD_HC_WP_GRP_SIZE] ==
+		(card->ext_csd.raw_hc_erase_gap_size ==
 			bw_ext_csd[EXT_CSD_HC_WP_GRP_SIZE]) &&
-		(ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT] ==
+		(card->ext_csd.raw_erase_timeout_mult ==
 			bw_ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT]) &&
-		(ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE] ==
+		(card->ext_csd.raw_hc_erase_grp_size ==
 			bw_ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE]) &&
-		(ext_csd[EXT_CSD_SEC_TRIM_MULT] ==
+		(card->ext_csd.raw_sec_trim_mult ==
 			bw_ext_csd[EXT_CSD_SEC_TRIM_MULT]) &&
-		(ext_csd[EXT_CSD_SEC_ERASE_MULT] ==
+		(card->ext_csd.raw_sec_erase_mult ==
 			bw_ext_csd[EXT_CSD_SEC_ERASE_MULT]) &&
-		(ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT] ==
+		(card->ext_csd.raw_sec_feature_support ==
 			bw_ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT]) &&
-		(ext_csd[EXT_CSD_TRIM_MULT] ==
+		(card->ext_csd.raw_trim_mult ==
 			bw_ext_csd[EXT_CSD_TRIM_MULT]) &&
-		memcmp(&ext_csd[EXT_CSD_SEC_CNT],
-		       &bw_ext_csd[EXT_CSD_SEC_CNT],
-		       4) != 0);
+		(card->ext_csd.raw_sectors[0] ==
+			bw_ext_csd[EXT_CSD_SEC_CNT + 0]) &&
+		(card->ext_csd.raw_sectors[1] ==
+			bw_ext_csd[EXT_CSD_SEC_CNT + 1]) &&
+		(card->ext_csd.raw_sectors[2] ==
+			bw_ext_csd[EXT_CSD_SEC_CNT + 2]) &&
+		(card->ext_csd.raw_sectors[3] ==
+			bw_ext_csd[EXT_CSD_SEC_CNT + 3]));
 	if (err)
 		err = -EINVAL;
 
@@ -770,7 +794,6 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 				 */
 				if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
 					err = mmc_compare_ext_csds(card,
-						ext_csd,
 						bus_width);
 				else
 					err = mmc_bus_test(card, bus_width);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c6927a4d157f..6ad43554ac05 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -64,6 +64,19 @@ struct mmc_ext_csd {
 	unsigned long long	enhanced_area_offset;	/* Units: Byte */
 	unsigned int		enhanced_area_size;	/* Units: KB */
 	unsigned int		boot_size;		/* in bytes */
+	u8			raw_partition_support;	/* 160 */
+	u8			raw_erased_mem_count;	/* 181 */
+	u8			raw_ext_csd_structure;	/* 194 */
+	u8			raw_card_type;		/* 196 */
+	u8			raw_s_a_timeout;		/* 217 */
+	u8			raw_hc_erase_gap_size;	/* 221 */
+	u8			raw_erase_timeout_mult;	/* 223 */
+	u8			raw_hc_erase_grp_size;	/* 224 */
+	u8			raw_sec_trim_mult;	/* 229 */
+	u8			raw_sec_erase_mult;	/* 230 */
+	u8			raw_sec_feature_support;/* 231 */
+	u8			raw_trim_mult;		/* 232 */
+	u8			raw_sectors[4];		/* 212 - 4 bytes */
 };
 
 struct sd_scr {
-- 
cgit v1.2.3


From b4a03b9aa96cc186bf3cfd7a55cb7d7227f0cf4d Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Wed, 1 Jun 2011 23:54:02 +0800
Subject: ACPI: Fixes device power states array overflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 28c2103 added new state ACPI_STATE_D3_COLD, so the device power
states array must be expanded by one also.

v2: Use ACPI_D_STATE_COUNT instead of number 5 for the array size.

Reported-by: Dan Carpenter <error27@gmail.com>
Suggested-by: Oldřich Jedlička <oldium.pro@seznam.cz>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_bus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 3a10ef5914eb..6cd5b6403a7b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -210,7 +210,7 @@ struct acpi_device_power_state {
 struct acpi_device_power {
 	int state;		/* Current state */
 	struct acpi_device_power_flags flags;
-	struct acpi_device_power_state states[4];	/* Power states (D0-D3) */
+	struct acpi_device_power_state states[ACPI_D_STATE_COUNT];	/* Power states (D0-D3Cold) */
 };
 
 /* Performance Management */
-- 
cgit v1.2.3


From 62f2a3a48bdc99822a24356e667e52c30df287c9 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Wed, 13 Jul 2011 14:10:29 +0000
Subject: net: remove NETIF_F_ALL_TX_OFFLOADS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no software fallback implemented for SCTP or FCoE checksumming,
and so it should not be passed on by software devices like bridge or bonding.

For VLAN devices, this is different. First, the driver for underlying device
should be prepared to get offloaded packets even when the feature is disabled
(especially if it advertises it in vlan_features). Second, devices under
VLANs do not get replaced without tearing down the VLAN first.

This fixes a mess I accidentally introduced while converting bonding to
ndo_fix_features.

NETIF_F_SOFT_FEATURES are removed from BOND_VLAN_FEATURES because they
are unused as of commit 712ae51afd.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 6 +++---
 include/linux/netdevice.h       | 6 ------
 net/8021q/vlan_dev.c            | 6 +++++-
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index eafe44a528ac..63c22b0bb5ad 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1428,9 +1428,9 @@ out:
 	return features;
 }
 
-#define BOND_VLAN_FEATURES	(NETIF_F_ALL_TX_OFFLOADS | \
-				 NETIF_F_SOFT_FEATURES | \
-				 NETIF_F_LRO)
+#define BOND_VLAN_FEATURES	(NETIF_F_ALL_CSUM | NETIF_F_SG | \
+				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+				 NETIF_F_HIGHDMA | NETIF_F_LRO)
 
 static void bond_compute_features(struct bonding *bond)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 54b8b4d7b68f..9e19477991ad 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1097,12 +1097,6 @@ struct net_device {
 #define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
 				 NETIF_F_FSO)
 
-#define NETIF_F_ALL_TX_OFFLOADS	(NETIF_F_ALL_CSUM | NETIF_F_SG | \
-				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
-				 NETIF_F_HIGHDMA | \
-				 NETIF_F_SCTP_CSUM | \
-				 NETIF_F_ALL_FCOE)
-
 	/*
 	 * If one device supports one of these features, then enable them
 	 * for all in netdev_increment_features.
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 86bff9b1ac47..6e82148edfc8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -528,7 +528,11 @@ static int vlan_dev_init(struct net_device *dev)
 					  (1<<__LINK_STATE_DORMANT))) |
 		      (1<<__LINK_STATE_PRESENT);
 
-	dev->hw_features = NETIF_F_ALL_TX_OFFLOADS;
+	dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG |
+			   NETIF_F_FRAGLIST | NETIF_F_ALL_TSO |
+			   NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM |
+			   NETIF_F_ALL_FCOE;
+
 	dev->features |= real_dev->vlan_features | NETIF_F_LLTX;
 	dev->gso_max_size = real_dev->gso_max_size;
 
-- 
cgit v1.2.3


From e3c1620434ac77b618ce74c024ace3559602ac99 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 15 Jul 2011 14:39:10 +0000
Subject: drm/radeon/kms: add new NI pci ids

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index e08f344c6cff..3d53efd25ab9 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -182,6 +182,7 @@
 	{0x1002, 0x6750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6758, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6759, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x675F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6760, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6761, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6762, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
@@ -192,6 +193,7 @@
 	{0x1002, 0x6767, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6768, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6770, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6778, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6880, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6888, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3


From a07c7964a29b6dc515b120f1e1c223ac2f8666f5 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sat, 16 Jul 2011 22:22:20 +0000
Subject: include/linux/sdla.h: remove the prototype of sdla()

`make headers_check` complains that

linux-2.6/usr/include/linux/sdla.h:116: userspace cannot reference
function or variable defined in the kernel

this is due to that there is no such a kernel function,

void sdla(void *cfg_info, char *dev, struct frad_conf *conf, int quiet);

I don't know why we have it in a kernel header, so remove it.

Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sdla.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sdla.h b/include/linux/sdla.h
index 564acd3a71c1..9995c7fc3f60 100644
--- a/include/linux/sdla.h
+++ b/include/linux/sdla.h
@@ -112,11 +112,7 @@ struct sdla_dlci_conf {
    short Tb_max;
 };
 
-#ifndef __KERNEL__
-
-void sdla(void *cfg_info, char *dev, struct frad_conf *conf, int quiet);
-
-#else
+#ifdef __KERNEL__
 
 /* important Z80 window addresses */
 #define SDLA_CONTROL_WND		0xE000
-- 
cgit v1.2.3


From 7765be2fec0f476fcd61812d5f9406b04c765020 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 14 Jul 2011 12:24:11 -0700
Subject: rcu: Fix RCU_BOOST race handling current->rcu_read_unlock_special

The RCU_BOOST commits for TREE_PREEMPT_RCU introduced an other-task
write to a new RCU_READ_UNLOCK_BOOSTED bit in the task_struct structure's
->rcu_read_unlock_special field, but, as noted by Steven Rostedt, without
correctly synchronizing all accesses to ->rcu_read_unlock_special.
This could result in bits in ->rcu_read_unlock_special being spuriously
set and cleared due to conflicting accesses, which in turn could result
in deadlocks between the rcu_node structure's ->lock and the scheduler's
rq and pi locks.  These deadlocks would result from RCU incorrectly
believing that the just-ended RCU read-side critical section had been
preempted and/or boosted.  If that RCU read-side critical section was
executed with either rq or pi locks held, RCU's ensuing (incorrect)
calls to the scheduler would cause the scheduler to attempt to once
again acquire the rq and pi locks, resulting in deadlock.  More complex
deadlock cycles are also possible, involving multiple rq and pi locks
as well as locks from multiple rcu_node structures.

This commit fixes synchronization by creating ->rcu_boosted field in
task_struct that is accessed and modified only when holding the ->lock
in the rcu_node structure on which the task is queued (on that rcu_node
structure's ->blkd_tasks list).  This results in tasks accessing only
their own current->rcu_read_unlock_special fields, making unsynchronized
access once again legal, and keeping the rcu_read_unlock() fastpath free
of atomic instructions and memory barriers.

The reason that the rcu_read_unlock() fastpath does not need to access
the new current->rcu_boosted field is that this new field cannot
be non-zero unless the RCU_READ_UNLOCK_BLOCKED bit is set in the
current->rcu_read_unlock_special field.  Therefore, rcu_read_unlock()
need only test current->rcu_read_unlock_special: if that is zero, then
current->rcu_boosted must also be zero.

This bug does not affect TINY_PREEMPT_RCU because this implementation
of RCU accesses current->rcu_read_unlock_special with irqs disabled,
thus preventing races on the !SMP systems that TINY_PREEMPT_RCU runs on.

Maybe-reported-by: Dave Jones <davej@redhat.com>
Maybe-reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/sched.h   | 3 +++
 kernel/rcutree_plugin.h | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 496770a96487..76676a407e4a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1254,6 +1254,9 @@ struct task_struct {
 #ifdef CONFIG_PREEMPT_RCU
 	int rcu_read_lock_nesting;
 	char rcu_read_unlock_special;
+#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
+	int rcu_boosted;
+#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
 	struct list_head rcu_node_entry;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 6abef3cfcbc1..3a0ae0355222 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -342,6 +342,11 @@ static void rcu_read_unlock_special(struct task_struct *t)
 #ifdef CONFIG_RCU_BOOST
 		if (&t->rcu_node_entry == rnp->boost_tasks)
 			rnp->boost_tasks = np;
+		/* Snapshot and clear ->rcu_boosted with rcu_node lock held. */
+		if (t->rcu_boosted) {
+			special |= RCU_READ_UNLOCK_BOOSTED;
+			t->rcu_boosted = 0;
+		}
 #endif /* #ifdef CONFIG_RCU_BOOST */
 		t->rcu_blocked_node = NULL;
 
@@ -358,7 +363,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
 #ifdef CONFIG_RCU_BOOST
 		/* Unboost if we were boosted. */
 		if (special & RCU_READ_UNLOCK_BOOSTED) {
-			t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
 			rt_mutex_unlock(t->rcu_boost_mutex);
 			t->rcu_boost_mutex = NULL;
 		}
@@ -1176,7 +1180,7 @@ static int rcu_boost(struct rcu_node *rnp)
 	t = container_of(tb, struct task_struct, rcu_node_entry);
 	rt_mutex_init_proxy_locked(&mtx, t);
 	t->rcu_boost_mutex = &mtx;
-	t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
+	t->rcu_boosted = 1;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	rt_mutex_lock(&mtx);  /* Side effect: boosts task t's priority. */
 	rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
-- 
cgit v1.2.3


From 9c3f75cbd144014bea6af866a154cc2e73ab2287 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 14 Jul 2011 13:00:06 +0200
Subject: sched: Break out cpu_power from the sched_group structure

In order to prepare for non-unique sched_groups per domain, we need to
carry the cpu_power elsewhere, so put a level of indirection in.

Reported-and-tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-qkho2byuhe4482fuknss40ad@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 14 +++++++++-----
 kernel/sched.c        | 32 ++++++++++++++++++++++++++------
 kernel/sched_fair.c   | 46 +++++++++++++++++++++++-----------------------
 3 files changed, 58 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 496770a96487..2e5b3c8e2d3e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -893,16 +893,20 @@ static inline int sd_power_saving_flags(void)
 	return 0;
 }
 
-struct sched_group {
-	struct sched_group *next;	/* Must be a circular list */
-	atomic_t ref;
-
+struct sched_group_power {
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
 	 * single CPU.
 	 */
-	unsigned int cpu_power, cpu_power_orig;
+	unsigned int power, power_orig;
+};
+
+struct sched_group {
+	struct sched_group *next;	/* Must be a circular list */
+	atomic_t ref;
+
 	unsigned int group_weight;
+	struct sched_group_power *sgp;
 
 	/*
 	 * The CPUs this group covers.
diff --git a/kernel/sched.c b/kernel/sched.c
index 3dc716f6d8ad..36c10d25d4cd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6557,7 +6557,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 			break;
 		}
 
-		if (!group->cpu_power) {
+		if (!group->sgp->power) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: domain->cpu_power not "
 					"set\n");
@@ -6581,9 +6581,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->cpu_power != SCHED_POWER_SCALE) {
+		if (group->sgp->power != SCHED_POWER_SCALE) {
 			printk(KERN_CONT " (cpu_power = %d)",
-				group->cpu_power);
+				group->sgp->power);
 		}
 
 		group = group->next;
@@ -6777,8 +6777,10 @@ static struct root_domain *alloc_rootdomain(void)
 static void free_sched_domain(struct rcu_head *rcu)
 {
 	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
-	if (atomic_dec_and_test(&sd->groups->ref))
+	if (atomic_dec_and_test(&sd->groups->ref)) {
+		kfree(sd->groups->sgp);
 		kfree(sd->groups);
+	}
 	kfree(sd);
 }
 
@@ -6945,6 +6947,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 struct sd_data {
 	struct sched_domain **__percpu sd;
 	struct sched_group **__percpu sg;
+	struct sched_group_power **__percpu sgp;
 };
 
 struct s_data {
@@ -6981,8 +6984,10 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 	if (child)
 		cpu = cpumask_first(sched_domain_span(child));
 
-	if (sg)
+	if (sg) {
 		*sg = *per_cpu_ptr(sdd->sg, cpu);
+		(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
+	}
 
 	return cpu;
 }
@@ -7020,7 +7025,7 @@ build_sched_groups(struct sched_domain *sd)
 			continue;
 
 		cpumask_clear(sched_group_cpus(sg));
-		sg->cpu_power = 0;
+		sg->sgp->power = 0;
 
 		for_each_cpu(j, span) {
 			if (get_group(j, sdd, NULL) != group)
@@ -7185,6 +7190,7 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
 	if (cpu == cpumask_first(sched_group_cpus(sg))) {
 		WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
 		*per_cpu_ptr(sdd->sg, cpu) = NULL;
+		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
 	}
 }
 
@@ -7234,9 +7240,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 		if (!sdd->sg)
 			return -ENOMEM;
 
+		sdd->sgp = alloc_percpu(struct sched_group_power *);
+		if (!sdd->sgp)
+			return -ENOMEM;
+
 		for_each_cpu(j, cpu_map) {
 			struct sched_domain *sd;
 			struct sched_group *sg;
+			struct sched_group_power *sgp;
 
 		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
@@ -7251,6 +7262,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 				return -ENOMEM;
 
 			*per_cpu_ptr(sdd->sg, j) = sg;
+
+			sgp = kzalloc_node(sizeof(struct sched_group_power),
+					GFP_KERNEL, cpu_to_node(j));
+			if (!sgp)
+				return -ENOMEM;
+
+			*per_cpu_ptr(sdd->sgp, j) = sgp;
 		}
 	}
 
@@ -7268,9 +7286,11 @@ static void __sdt_free(const struct cpumask *cpu_map)
 		for_each_cpu(j, cpu_map) {
 			kfree(*per_cpu_ptr(sdd->sd, j));
 			kfree(*per_cpu_ptr(sdd->sg, j));
+			kfree(*per_cpu_ptr(sdd->sgp, j));
 		}
 		free_percpu(sdd->sd);
 		free_percpu(sdd->sg);
+		free_percpu(sdd->sgp);
 	}
 }
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 433491c2dc8f..c768588e180b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1585,7 +1585,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		}
 
 		/* Adjust by relative CPU power of the group */
-		avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power;
+		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -2631,7 +2631,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 		power >>= SCHED_POWER_SHIFT;
 	}
 
-	sdg->cpu_power_orig = power;
+	sdg->sgp->power_orig = power;
 
 	if (sched_feat(ARCH_POWER))
 		power *= arch_scale_freq_power(sd, cpu);
@@ -2647,7 +2647,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 		power = 1;
 
 	cpu_rq(cpu)->cpu_power = power;
-	sdg->cpu_power = power;
+	sdg->sgp->power = power;
 }
 
 static void update_group_power(struct sched_domain *sd, int cpu)
@@ -2665,11 +2665,11 @@ static void update_group_power(struct sched_domain *sd, int cpu)
 
 	group = child->groups;
 	do {
-		power += group->cpu_power;
+		power += group->sgp->power;
 		group = group->next;
 	} while (group != child->groups);
 
-	sdg->cpu_power = power;
+	sdg->sgp->power = power;
 }
 
 /*
@@ -2691,7 +2691,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 	/*
 	 * If ~90% of the cpu_power is still there, we're good.
 	 */
-	if (group->cpu_power * 32 > group->cpu_power_orig * 29)
+	if (group->sgp->power * 32 > group->sgp->power_orig * 29)
 		return 1;
 
 	return 0;
@@ -2771,7 +2771,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 	}
 
 	/* Adjust by relative CPU power of the group */
-	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power;
+	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
 
 	/*
 	 * Consider the group unbalanced when the imbalance is larger
@@ -2788,7 +2788,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 	if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
 		sgs->group_imb = 1;
 
-	sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power,
+	sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
 						SCHED_POWER_SCALE);
 	if (!sgs->group_capacity)
 		sgs->group_capacity = fix_small_capacity(sd, group);
@@ -2877,7 +2877,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 			return;
 
 		sds->total_load += sgs.group_load;
-		sds->total_pwr += sg->cpu_power;
+		sds->total_pwr += sg->sgp->power;
 
 		/*
 		 * In case the child domain prefers tasks go to siblings
@@ -2962,7 +2962,7 @@ static int check_asym_packing(struct sched_domain *sd,
 	if (this_cpu > busiest_cpu)
 		return 0;
 
-	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
+	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power,
 				       SCHED_POWER_SCALE);
 	return 1;
 }
@@ -2993,7 +2993,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 
 	scaled_busy_load_per_task = sds->busiest_load_per_task
 					 * SCHED_POWER_SCALE;
-	scaled_busy_load_per_task /= sds->busiest->cpu_power;
+	scaled_busy_load_per_task /= sds->busiest->sgp->power;
 
 	if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
 			(scaled_busy_load_per_task * imbn)) {
@@ -3007,28 +3007,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 	 * moving them.
 	 */
 
-	pwr_now += sds->busiest->cpu_power *
+	pwr_now += sds->busiest->sgp->power *
 			min(sds->busiest_load_per_task, sds->max_load);
-	pwr_now += sds->this->cpu_power *
+	pwr_now += sds->this->sgp->power *
 			min(sds->this_load_per_task, sds->this_load);
 	pwr_now /= SCHED_POWER_SCALE;
 
 	/* Amount of load we'd subtract */
 	tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
-		sds->busiest->cpu_power;
+		sds->busiest->sgp->power;
 	if (sds->max_load > tmp)
-		pwr_move += sds->busiest->cpu_power *
+		pwr_move += sds->busiest->sgp->power *
 			min(sds->busiest_load_per_task, sds->max_load - tmp);
 
 	/* Amount of load we'd add */
-	if (sds->max_load * sds->busiest->cpu_power <
+	if (sds->max_load * sds->busiest->sgp->power <
 		sds->busiest_load_per_task * SCHED_POWER_SCALE)
-		tmp = (sds->max_load * sds->busiest->cpu_power) /
-			sds->this->cpu_power;
+		tmp = (sds->max_load * sds->busiest->sgp->power) /
+			sds->this->sgp->power;
 	else
 		tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
-			sds->this->cpu_power;
-	pwr_move += sds->this->cpu_power *
+			sds->this->sgp->power;
+	pwr_move += sds->this->sgp->power *
 			min(sds->this_load_per_task, sds->this_load + tmp);
 	pwr_move /= SCHED_POWER_SCALE;
 
@@ -3074,7 +3074,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
 
 		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
 
-		load_above_capacity /= sds->busiest->cpu_power;
+		load_above_capacity /= sds->busiest->sgp->power;
 	}
 
 	/*
@@ -3090,8 +3090,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
 	max_pull = min(sds->max_load - sds->avg_load, load_above_capacity);
 
 	/* How much load to actually move to equalise the imbalance */
-	*imbalance = min(max_pull * sds->busiest->cpu_power,
-		(sds->avg_load - sds->this_load) * sds->this->cpu_power)
+	*imbalance = min(max_pull * sds->busiest->sgp->power,
+		(sds->avg_load - sds->this_load) * sds->this->sgp->power)
 			/ SCHED_POWER_SCALE;
 
 	/*
-- 
cgit v1.2.3


From e3589f6c81e4764d32a25d2a2a0afe54fa344f5c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 15 Jul 2011 10:35:52 +0200
Subject: sched: Allow for overlapping sched_domain spans

Allow for sched_domain spans that overlap by giving such domains their
own sched_group list instead of sharing the sched_groups amongst
each-other.

This is needed for machines with more than 16 nodes, because
sched_domain_node_span() will generate a node mask from the
16 nearest nodes without regard if these masks have any overlap.

Currently sched_domains have a sched_group that maps to their child
sched_domain span, and since there is no overlap we share the
sched_group between the sched_domains of the various CPUs. If however
there is overlap, we would need to link the sched_group list in
different ways for each cpu, and hence sharing isn't possible.

In order to solve this, allocate private sched_groups for each CPU's
sched_domain but have the sched_groups share a sched_group_power
structure such that we can uniquely track the power.

Reported-and-tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-08bxqw9wis3qti9u5inifh3y@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |   2 +
 kernel/sched.c          | 157 +++++++++++++++++++++++++++++++++++++++---------
 kernel/sched_features.h |   2 +
 3 files changed, 132 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2e5b3c8e2d3e..bde99d5358dc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -844,6 +844,7 @@ enum cpu_idle_type {
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
 #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
 
 enum powersavings_balance_level {
 	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
@@ -894,6 +895,7 @@ static inline int sd_power_saving_flags(void)
 }
 
 struct sched_group_power {
+	atomic_t ref;
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
 	 * single CPU.
diff --git a/kernel/sched.c b/kernel/sched.c
index 36c10d25d4cd..921adf6f6fad 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6774,10 +6774,36 @@ static struct root_domain *alloc_rootdomain(void)
 	return rd;
 }
 
+static void free_sched_groups(struct sched_group *sg, int free_sgp)
+{
+	struct sched_group *tmp, *first;
+
+	if (!sg)
+		return;
+
+	first = sg;
+	do {
+		tmp = sg->next;
+
+		if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
+			kfree(sg->sgp);
+
+		kfree(sg);
+		sg = tmp;
+	} while (sg != first);
+}
+
 static void free_sched_domain(struct rcu_head *rcu)
 {
 	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
-	if (atomic_dec_and_test(&sd->groups->ref)) {
+
+	/*
+	 * If its an overlapping domain it has private groups, iterate and
+	 * nuke them all.
+	 */
+	if (sd->flags & SD_OVERLAP) {
+		free_sched_groups(sd->groups, 1);
+	} else if (atomic_dec_and_test(&sd->groups->ref)) {
 		kfree(sd->groups->sgp);
 		kfree(sd->groups);
 	}
@@ -6967,15 +6993,73 @@ struct sched_domain_topology_level;
 typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
 typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
 
+#define SDTL_OVERLAP	0x01
+
 struct sched_domain_topology_level {
 	sched_domain_init_f init;
 	sched_domain_mask_f mask;
+	int		    flags;
 	struct sd_data      data;
 };
 
-/*
- * Assumes the sched_domain tree is fully constructed
- */
+static int
+build_overlap_sched_groups(struct sched_domain *sd, int cpu)
+{
+	struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
+	const struct cpumask *span = sched_domain_span(sd);
+	struct cpumask *covered = sched_domains_tmpmask;
+	struct sd_data *sdd = sd->private;
+	struct sched_domain *child;
+	int i;
+
+	cpumask_clear(covered);
+
+	for_each_cpu(i, span) {
+		struct cpumask *sg_span;
+
+		if (cpumask_test_cpu(i, covered))
+			continue;
+
+		sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+				GFP_KERNEL, cpu_to_node(i));
+
+		if (!sg)
+			goto fail;
+
+		sg_span = sched_group_cpus(sg);
+
+		child = *per_cpu_ptr(sdd->sd, i);
+		if (child->child) {
+			child = child->child;
+			cpumask_copy(sg_span, sched_domain_span(child));
+		} else
+			cpumask_set_cpu(i, sg_span);
+
+		cpumask_or(covered, covered, sg_span);
+
+		sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
+		atomic_inc(&sg->sgp->ref);
+
+		if (cpumask_test_cpu(cpu, sg_span))
+			groups = sg;
+
+		if (!first)
+			first = sg;
+		if (last)
+			last->next = sg;
+		last = sg;
+		last->next = first;
+	}
+	sd->groups = groups;
+
+	return 0;
+
+fail:
+	free_sched_groups(first, 0);
+
+	return -ENOMEM;
+}
+
 static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 {
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
@@ -6987,23 +7071,21 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 	if (sg) {
 		*sg = *per_cpu_ptr(sdd->sg, cpu);
 		(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
+		atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
 	}
 
 	return cpu;
 }
 
 /*
- * build_sched_groups takes the cpumask we wish to span, and a pointer
- * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- * (due to the fact that we keep track of groups covered with a struct cpumask).
- *
  * build_sched_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
  * and ->cpu_power to 0.
+ *
+ * Assumes the sched_domain tree is fully constructed
  */
-static void
-build_sched_groups(struct sched_domain *sd)
+static int
+build_sched_groups(struct sched_domain *sd, int cpu)
 {
 	struct sched_group *first = NULL, *last = NULL;
 	struct sd_data *sdd = sd->private;
@@ -7011,6 +7093,12 @@ build_sched_groups(struct sched_domain *sd)
 	struct cpumask *covered;
 	int i;
 
+	get_group(cpu, sdd, &sd->groups);
+	atomic_inc(&sd->groups->ref);
+
+	if (cpu != cpumask_first(sched_domain_span(sd)))
+		return 0;
+
 	lockdep_assert_held(&sched_domains_mutex);
 	covered = sched_domains_tmpmask;
 
@@ -7042,6 +7130,8 @@ build_sched_groups(struct sched_domain *sd)
 		last = sg;
 	}
 	last->next = first;
+
+	return 0;
 }
 
 /*
@@ -7056,12 +7146,17 @@ build_sched_groups(struct sched_domain *sd)
  */
 static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 {
-	WARN_ON(!sd || !sd->groups);
+	struct sched_group *sg = sd->groups;
 
-	if (cpu != group_first_cpu(sd->groups))
-		return;
+	WARN_ON(!sd || !sg);
+
+	do {
+		sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+		sg = sg->next;
+	} while (sg != sd->groups);
 
-	sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
+	if (cpu != group_first_cpu(sg))
+		return;
 
 	update_group_power(sd, cpu);
 }
@@ -7182,16 +7277,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 static void claim_allocations(int cpu, struct sched_domain *sd)
 {
 	struct sd_data *sdd = sd->private;
-	struct sched_group *sg = sd->groups;
 
 	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
 	*per_cpu_ptr(sdd->sd, cpu) = NULL;
 
-	if (cpu == cpumask_first(sched_group_cpus(sg))) {
-		WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
+	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
 		*per_cpu_ptr(sdd->sg, cpu) = NULL;
+
+	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
 		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
-	}
 }
 
 #ifdef CONFIG_SCHED_SMT
@@ -7216,7 +7310,7 @@ static struct sched_domain_topology_level default_topology[] = {
 #endif
 	{ sd_init_CPU, cpu_cpu_mask, },
 #ifdef CONFIG_NUMA
-	{ sd_init_NODE, cpu_node_mask, },
+	{ sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
 	{ sd_init_ALLNODES, cpu_allnodes_mask, },
 #endif
 	{ NULL, },
@@ -7284,7 +7378,9 @@ static void __sdt_free(const struct cpumask *cpu_map)
 		struct sd_data *sdd = &tl->data;
 
 		for_each_cpu(j, cpu_map) {
-			kfree(*per_cpu_ptr(sdd->sd, j));
+			struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
+			if (sd && (sd->flags & SD_OVERLAP))
+				free_sched_groups(sd->groups, 0);
 			kfree(*per_cpu_ptr(sdd->sg, j));
 			kfree(*per_cpu_ptr(sdd->sgp, j));
 		}
@@ -7336,8 +7432,11 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 		struct sched_domain_topology_level *tl;
 
 		sd = NULL;
-		for (tl = sched_domain_topology; tl->init; tl++)
+		for (tl = sched_domain_topology; tl->init; tl++) {
 			sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
+			if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
+				sd->flags |= SD_OVERLAP;
+		}
 
 		while (sd->child)
 			sd = sd->child;
@@ -7349,13 +7448,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 	for_each_cpu(i, cpu_map) {
 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
 			sd->span_weight = cpumask_weight(sched_domain_span(sd));
-			get_group(i, sd->private, &sd->groups);
-			atomic_inc(&sd->groups->ref);
-
-			if (i != cpumask_first(sched_domain_span(sd)))
-				continue;
-
-			build_sched_groups(sd);
+			if (sd->flags & SD_OVERLAP) {
+				if (build_overlap_sched_groups(sd, i))
+					goto error;
+			} else {
+				if (build_sched_groups(sd, i))
+					goto error;
+			}
 		}
 	}
 
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index be40f7371ee1..1e7066d76c26 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -70,3 +70,5 @@ SCHED_FEAT(NONIRQ_POWER, 1)
  * using the scheduler IPI. Reduces rq->lock contention/bounces.
  */
 SCHED_FEAT(TTWU_QUEUE, 1)
+
+SCHED_FEAT(FORCE_SD_OVERLAP, 0)
-- 
cgit v1.2.3