From 8e552c36d90c03d2cabf5373788998966751b609 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@debian.org>
Date: Mon, 12 May 2008 21:46:29 -0400
Subject: power_supply: add CHARGE_COUNTER property and olpc_battery support
 for it

This adds PROP_CHARGE_COUNTER to the power supply class (documenting it
as well).  The OLPC battery driver uses this for spitting out its ACR
values (in uAh).  We have some rounding errors (the data sheet claims
416.7, the math actually works out to 416.666667, so we're forced to
choose between overflows or precision loss.  I chose precision loss,
and stuck w/ data sheet values), but I don't think anyone will care
that much.

Signed-off-by: Andres Salomon <dilinger@debian.org>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/power_supply.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 68ed19ccf1f7..ea96ead1d39d 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -78,6 +78,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_CHARGE_EMPTY,
 	POWER_SUPPLY_PROP_CHARGE_NOW,
 	POWER_SUPPLY_PROP_CHARGE_AVG,
+	POWER_SUPPLY_PROP_CHARGE_COUNTER,
 	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_FULL,
-- 
cgit v1.2.3


From a822bea7962b500b0bcab41bf3500f7c40ae56b5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 6 Jun 2008 01:34:00 -0400
Subject: Input: serio - mark serio_register_driver() __must_check

Also remove extra declaration of serio_register_driver().

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/serio.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serio.h b/include/linux/serio.h
index e72716cca577..25641d9e0ea8 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -87,11 +87,10 @@ void serio_unregister_port(struct serio *serio);
 void serio_unregister_child_port(struct serio *serio);
 
 int __serio_register_driver(struct serio_driver *drv, struct module *owner, const char *mod_name);
-static inline int serio_register_driver(struct serio_driver *drv)
+static inline int __must_check serio_register_driver(struct serio_driver *drv)
 {
 	return __serio_register_driver(drv, THIS_MODULE, KBUILD_MODNAME);
 }
-int serio_register_driver(struct serio_driver *drv);
 void serio_unregister_driver(struct serio_driver *drv);
 
 static inline int serio_write(struct serio *serio, unsigned char data)
-- 
cgit v1.2.3


From d8e64406a037a64444175730294e449c9e21f5ec Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 23 Jul 2008 13:09:48 -0700
Subject: md: delay notification of 'active_idle' to the recovery thread

sysfs_notify might sleep, so do not call it from md_safemode_timeout.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/md/md.c           | 5 ++++-
 include/linux/raid/md_k.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index c2ff77ccec50..0f1b83096425 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3483,7 +3483,7 @@ static void md_safemode_timeout(unsigned long data)
 	if (!atomic_read(&mddev->writes_pending)) {
 		mddev->safemode = 1;
 		if (mddev->external)
-			sysfs_notify(&mddev->kobj, NULL, "array_state");
+			set_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags);
 	}
 	md_wakeup_thread(mddev->thread);
 }
@@ -6051,6 +6051,9 @@ void md_check_recovery(mddev_t *mddev)
 	if (mddev->bitmap)
 		bitmap_daemon_work(mddev->bitmap);
 
+	if (test_and_clear_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags))
+		sysfs_notify(&mddev->kobj, NULL, "array_state");
+
 	if (mddev->ro)
 		return;
 
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 9f2549ac0e2d..c200b9a34aff 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -128,6 +128,7 @@ struct mddev_s
 #define MD_CHANGE_DEVS	0	/* Some device status has changed */
 #define MD_CHANGE_CLEAN 1	/* transition to or from 'clean' */
 #define MD_CHANGE_PENDING 2	/* superblock update in progress */
+#define MD_NOTIFY_ARRAY_STATE 3	/* atomic context wants to notify userspace */
 
 	int				ro;
 
-- 
cgit v1.2.3


From 3f07af494dfa6de43137dae430431c9fbf929c0c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 25 Jul 2008 22:25:13 -0400
Subject: of: adapt of_find_i2c_driver() to be usable by SPI also

SPI has a similar problem as I2C in that it needs to determine an
appropriate modalias value for each device node.  This patch adapts
the of_i2c of_find_i2c_driver() function to be usable by of_spi also.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/base.c   | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/of/of_i2c.c | 64 ++------------------------------------
 include/linux/of.h  |  1 +
 3 files changed, 92 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 23ffb7c0caf2..ad8ac1a8af28 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -385,3 +385,91 @@ struct device_node *of_find_matching_node(struct device_node *from,
 	return np;
 }
 EXPORT_SYMBOL(of_find_matching_node);
+
+/**
+ * of_modalias_table: Table of explicit compatible ==> modalias mappings
+ *
+ * This table allows particulare compatible property values to be mapped
+ * to modalias strings.  This is useful for busses which do not directly
+ * understand the OF device tree but are populated based on data contained
+ * within the device tree.  SPI and I2C are the two current users of this
+ * table.
+ *
+ * In most cases, devices do not need to be listed in this table because
+ * the modalias value can be derived directly from the compatible table.
+ * However, if for any reason a value cannot be derived, then this table
+ * provides a method to override the implicit derivation.
+ *
+ * At the moment, a single table is used for all bus types because it is
+ * assumed that the data size is small and that the compatible values
+ * should already be distinct enough to differentiate between SPI, I2C
+ * and other devices.
+ */
+struct of_modalias_table {
+	char *of_device;
+	char *modalias;
+};
+static struct of_modalias_table of_modalias_table[] = {
+	/* Empty for now; add entries as needed */
+};
+
+/**
+ * of_modalias_node - Lookup appropriate modalias for a device node
+ * @node:	pointer to a device tree node
+ * @modalias:	Pointer to buffer that modalias value will be copied into
+ * @len:	Length of modalias value
+ *
+ * Based on the value of the compatible property, this routine will determine
+ * an appropriate modalias value for a particular device tree node.  Three
+ * separate methods are used to derive a modalias value.
+ *
+ * First method is to lookup the compatible value in of_modalias_table.
+ * Second is to look for a "linux,<modalias>" entry in the compatible list
+ * and used that for modalias.  Third is to strip off the manufacturer
+ * prefix from the first compatible entry and use the remainder as modalias
+ *
+ * This routine returns 0 on success
+ */
+int of_modalias_node(struct device_node *node, char *modalias, int len)
+{
+	int i, cplen;
+	const char *compatible;
+	const char *p;
+
+	/* 1. search for exception list entry */
+	for (i = 0; i < ARRAY_SIZE(of_modalias_table); i++) {
+		compatible = of_modalias_table[i].of_device;
+		if (!of_device_is_compatible(node, compatible))
+			continue;
+		strlcpy(modalias, of_modalias_table[i].modalias, len);
+		return 0;
+	}
+
+	compatible = of_get_property(node, "compatible", &cplen);
+	if (!compatible)
+		return -ENODEV;
+
+	/* 2. search for linux,<modalias> entry */
+	p = compatible;
+	while (cplen > 0) {
+		if (!strncmp(p, "linux,", 6)) {
+			p += 6;
+			strlcpy(modalias, p, len);
+			return 0;
+		}
+
+		i = strlen(p) + 1;
+		p += i;
+		cplen -= i;
+	}
+
+	/* 3. take first compatible entry and strip manufacturer */
+	p = strchr(compatible, ',');
+	if (!p)
+		return -ENODEV;
+	p++;
+	strlcpy(modalias, p, len);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_modalias_node);
+
diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c
index 344e1b03dd8b..6a98dc8aa30b 100644
--- a/drivers/of/of_i2c.c
+++ b/drivers/of/of_i2c.c
@@ -16,62 +16,6 @@
 #include <linux/of_i2c.h>
 #include <linux/module.h>
 
-struct i2c_driver_device {
-	char    *of_device;
-	char    *i2c_type;
-};
-
-static struct i2c_driver_device i2c_devices[] = {
-};
-
-static int of_find_i2c_driver(struct device_node *node,
-			      struct i2c_board_info *info)
-{
-	int i, cplen;
-	const char *compatible;
-	const char *p;
-
-	/* 1. search for exception list entry */
-	for (i = 0; i < ARRAY_SIZE(i2c_devices); i++) {
-		if (!of_device_is_compatible(node, i2c_devices[i].of_device))
-			continue;
-		if (strlcpy(info->type, i2c_devices[i].i2c_type,
-			    I2C_NAME_SIZE) >= I2C_NAME_SIZE)
-			return -ENOMEM;
-
-		return 0;
-	}
-
-	compatible = of_get_property(node, "compatible", &cplen);
-	if (!compatible)
-		return -ENODEV;
-
-	/* 2. search for linux,<i2c-type> entry */
-	p = compatible;
-	while (cplen > 0) {
-		if (!strncmp(p, "linux,", 6)) {
-			p += 6;
-			if (strlcpy(info->type, p,
-				    I2C_NAME_SIZE) >= I2C_NAME_SIZE)
-				return -ENOMEM;
-			return 0;
-		}
-
-		i = strlen(p) + 1;
-		p += i;
-		cplen -= i;
-	}
-
-	/* 3. take fist compatible entry and strip manufacturer */
-	p = strchr(compatible, ',');
-	if (!p)
-		return -ENODEV;
-	p++;
-	if (strlcpy(info->type, p, I2C_NAME_SIZE) >= I2C_NAME_SIZE)
-		return -ENOMEM;
-	return 0;
-}
-
 void of_register_i2c_devices(struct i2c_adapter *adap,
 			     struct device_node *adap_node)
 {
@@ -83,6 +27,9 @@ void of_register_i2c_devices(struct i2c_adapter *adap,
 		const u32 *addr;
 		int len;
 
+		if (of_modalias_node(node, info.type, sizeof(info.type)) < 0)
+			continue;
+
 		addr = of_get_property(node, "reg", &len);
 		if (!addr || len < sizeof(int) || *addr > (1 << 10) - 1) {
 			printk(KERN_ERR
@@ -92,11 +39,6 @@ void of_register_i2c_devices(struct i2c_adapter *adap,
 
 		info.irq = irq_of_parse_and_map(node, 0);
 
-		if (of_find_i2c_driver(node, &info) < 0) {
-			irq_dispose_mapping(info.irq);
-			continue;
-		}
-
 		info.addr = *addr;
 
 		request_module(info.type);
diff --git a/include/linux/of.h b/include/linux/of.h
index 59a61bdc98b6..79886ade070f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -70,5 +70,6 @@ extern int of_n_addr_cells(struct device_node *np);
 extern int of_n_size_cells(struct device_node *np);
 extern const struct of_device_id *of_match_node(
 	const struct of_device_id *matches, const struct device_node *node);
+extern int of_modalias_node(struct device_node *node, char *modalias, int len);
 
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From dc87c98e8f635a718f1abb2c3e15fc77c0001651 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 May 2008 16:50:22 -0600
Subject: spi: split up spi_new_device() to allow two stage registration.

spi_new_device() allocates and registers an spi device all in one swoop.
If the driver needs to add extra data to the spi_device before it is
registered, then this causes problems.  This is needed for OF device
tree support so that the SPI device tree helper can add a pointer to
the device node after the device is allocated, but before the device
is registered.  OF aware SPI devices can then retrieve data out of the
device node to populate a platform data structure.

This patch splits the allocation and registration portions of code out
of spi_new_device() and creates two new functions; spi_alloc_device()
and spi_register_device().  spi_new_device() is modified to use the new
functions for allocation and registration.  None of the existing users
of spi_new_device() should be affected by this change.

Drivers using the new API can forego the use of spi_board_info
structure to describe the device layout and populate data into the
spi_device structure directly.

This change is in preparation for adding an OF device tree parser to
generate spi_devices based on data in the device tree.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
---
 drivers/spi/spi.c       | 139 +++++++++++++++++++++++++++++++++---------------
 include/linux/spi/spi.h |  12 +++++
 2 files changed, 107 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ecca4a6a6f94..964124b60db2 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -178,6 +178,96 @@ struct boardinfo {
 static LIST_HEAD(board_list);
 static DEFINE_MUTEX(board_lock);
 
+/**
+ * spi_alloc_device - Allocate a new SPI device
+ * @master: Controller to which device is connected
+ * Context: can sleep
+ *
+ * Allows a driver to allocate and initialize a spi_device without
+ * registering it immediately.  This allows a driver to directly
+ * fill the spi_device with device parameters before calling
+ * spi_add_device() on it.
+ *
+ * Caller is responsible to call spi_add_device() on the returned
+ * spi_device structure to add it to the SPI master.  If the caller
+ * needs to discard the spi_device without adding it, then it should
+ * call spi_dev_put() on it.
+ *
+ * Returns a pointer to the new device, or NULL.
+ */
+struct spi_device *spi_alloc_device(struct spi_master *master)
+{
+	struct spi_device	*spi;
+	struct device		*dev = master->dev.parent;
+
+	if (!spi_master_get(master))
+		return NULL;
+
+	spi = kzalloc(sizeof *spi, GFP_KERNEL);
+	if (!spi) {
+		dev_err(dev, "cannot alloc spi_device\n");
+		spi_master_put(master);
+		return NULL;
+	}
+
+	spi->master = master;
+	spi->dev.parent = dev;
+	spi->dev.bus = &spi_bus_type;
+	spi->dev.release = spidev_release;
+	device_initialize(&spi->dev);
+	return spi;
+}
+EXPORT_SYMBOL_GPL(spi_alloc_device);
+
+/**
+ * spi_add_device - Add spi_device allocated with spi_alloc_device
+ * @spi: spi_device to register
+ *
+ * Companion function to spi_alloc_device.  Devices allocated with
+ * spi_alloc_device can be added onto the spi bus with this function.
+ *
+ * Returns 0 on success; non-zero on failure
+ */
+int spi_add_device(struct spi_device *spi)
+{
+	struct device *dev = spi->master->dev.parent;
+	int status;
+
+	/* Chipselects are numbered 0..max; validate. */
+	if (spi->chip_select >= spi->master->num_chipselect) {
+		dev_err(dev, "cs%d >= max %d\n",
+			spi->chip_select,
+			spi->master->num_chipselect);
+		return -EINVAL;
+	}
+
+	/* Set the bus ID string */
+	snprintf(spi->dev.bus_id, sizeof spi->dev.bus_id,
+			"%s.%u", spi->master->dev.bus_id,
+			spi->chip_select);
+
+	/* drivers may modify this initial i/o setup */
+	status = spi->master->setup(spi);
+	if (status < 0) {
+		dev_err(dev, "can't %s %s, status %d\n",
+				"setup", spi->dev.bus_id, status);
+		return status;
+	}
+
+	/* driver core catches callers that misbehave by defining
+	 * devices that already exist.
+	 */
+	status = device_add(&spi->dev);
+	if (status < 0) {
+		dev_err(dev, "can't %s %s, status %d\n",
+				"add", spi->dev.bus_id, status);
+		return status;
+	}
+
+	dev_dbg(dev, "registered child %s\n", spi->dev.bus_id);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spi_add_device);
 
 /**
  * spi_new_device - instantiate one new SPI device
@@ -197,7 +287,6 @@ struct spi_device *spi_new_device(struct spi_master *master,
 				  struct spi_board_info *chip)
 {
 	struct spi_device	*proxy;
-	struct device		*dev = master->dev.parent;
 	int			status;
 
 	/* NOTE:  caller did any chip->bus_num checks necessary.
@@ -207,66 +296,28 @@ struct spi_device *spi_new_device(struct spi_master *master,
 	 * suggests syslogged diagnostics are best here (ugh).
 	 */
 
-	/* Chipselects are numbered 0..max; validate. */
-	if (chip->chip_select >= master->num_chipselect) {
-		dev_err(dev, "cs%d > max %d\n",
-			chip->chip_select,
-			master->num_chipselect);
-		return NULL;
-	}
-
-	if (!spi_master_get(master))
+	proxy = spi_alloc_device(master);
+	if (!proxy)
 		return NULL;
 
 	WARN_ON(strlen(chip->modalias) >= sizeof(proxy->modalias));
 
-	proxy = kzalloc(sizeof *proxy, GFP_KERNEL);
-	if (!proxy) {
-		dev_err(dev, "can't alloc dev for cs%d\n",
-			chip->chip_select);
-		goto fail;
-	}
-	proxy->master = master;
 	proxy->chip_select = chip->chip_select;
 	proxy->max_speed_hz = chip->max_speed_hz;
 	proxy->mode = chip->mode;
 	proxy->irq = chip->irq;
 	strlcpy(proxy->modalias, chip->modalias, sizeof(proxy->modalias));
-
-	snprintf(proxy->dev.bus_id, sizeof proxy->dev.bus_id,
-			"%s.%u", master->dev.bus_id,
-			chip->chip_select);
-	proxy->dev.parent = dev;
-	proxy->dev.bus = &spi_bus_type;
 	proxy->dev.platform_data = (void *) chip->platform_data;
 	proxy->controller_data = chip->controller_data;
 	proxy->controller_state = NULL;
-	proxy->dev.release = spidev_release;
 
-	/* drivers may modify this initial i/o setup */
-	status = master->setup(proxy);
+	status = spi_add_device(proxy);
 	if (status < 0) {
-		dev_err(dev, "can't %s %s, status %d\n",
-				"setup", proxy->dev.bus_id, status);
-		goto fail;
+		spi_dev_put(proxy);
+		return NULL;
 	}
 
-	/* driver core catches callers that misbehave by defining
-	 * devices that already exist.
-	 */
-	status = device_register(&proxy->dev);
-	if (status < 0) {
-		dev_err(dev, "can't %s %s, status %d\n",
-				"add", proxy->dev.bus_id, status);
-		goto fail;
-	}
-	dev_dbg(dev, "registered child %s\n", proxy->dev.bus_id);
 	return proxy;
-
-fail:
-	spi_master_put(master);
-	kfree(proxy);
-	return NULL;
 }
 EXPORT_SYMBOL_GPL(spi_new_device);
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index a9cc29d46653..4be01bb44377 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -778,7 +778,19 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
  * use spi_new_device() to describe each device.  You can also call
  * spi_unregister_device() to start making that device vanish, but
  * normally that would be handled by spi_unregister_master().
+ *
+ * You can also use spi_alloc_device() and spi_add_device() to use a two
+ * stage registration sequence for each spi_device.  This gives the caller
+ * some more control over the spi_device structure before it is registered,
+ * but requires that caller to initialize fields that would otherwise
+ * be defined using the board info.
  */
+extern struct spi_device *
+spi_alloc_device(struct spi_master *master);
+
+extern int
+spi_add_device(struct spi_device *spi);
+
 extern struct spi_device *
 spi_new_device(struct spi_master *, struct spi_board_info *);
 
-- 
cgit v1.2.3


From 284b01897340974000bcc84de87a4e1becc8a83d Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 16 May 2008 11:37:09 -0600
Subject: spi: Add OF binding support for SPI busses

This patch adds support for populating an SPI bus based on data in the
OF device tree.  This is useful for powerpc platforms which use the
device tree instead of discrete code for describing platform layout.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/Kconfig     |  6 ++++
 drivers/of/Makefile    |  1 +
 drivers/of/of_spi.c    | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_spi.h | 18 ++++++++++
 4 files changed, 118 insertions(+)
 create mode 100644 drivers/of/of_spi.c
 create mode 100644 include/linux/of_spi.h

(limited to 'include/linux')

diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 1d7ec3129349..f821dbc952a4 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -13,3 +13,9 @@ config OF_I2C
 	depends on PPC_OF && I2C
 	help
 	  OpenFirmware I2C accessors
+
+config OF_SPI
+	def_tristate SPI
+	depends on OF && PPC_OF && SPI
+	help
+	  OpenFirmware SPI accessors
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 548772e871fd..4c3c6f8e36f5 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -2,3 +2,4 @@ obj-y = base.o
 obj-$(CONFIG_OF_DEVICE) += device.o platform.o
 obj-$(CONFIG_OF_GPIO)   += gpio.o
 obj-$(CONFIG_OF_I2C)	+= of_i2c.o
+obj-$(CONFIG_OF_SPI)	+= of_spi.o
diff --git a/drivers/of/of_spi.c b/drivers/of/of_spi.c
new file mode 100644
index 000000000000..b01eec026f68
--- /dev/null
+++ b/drivers/of/of_spi.c
@@ -0,0 +1,93 @@
+/*
+ * SPI OF support routines
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ *
+ * Support routines for deriving SPI device attachments from the device
+ * tree.
+ */
+
+#include <linux/of.h>
+#include <linux/device.h>
+#include <linux/spi/spi.h>
+#include <linux/of_spi.h>
+
+/**
+ * of_register_spi_devices - Register child devices onto the SPI bus
+ * @master:	Pointer to spi_master device
+ * @np:		parent node of SPI device nodes
+ *
+ * Registers an spi_device for each child node of 'np' which has a 'reg'
+ * property.
+ */
+void of_register_spi_devices(struct spi_master *master, struct device_node *np)
+{
+	struct spi_device *spi;
+	struct device_node *nc;
+	const u32 *prop;
+	int rc;
+	int len;
+
+	for_each_child_of_node(np, nc) {
+		/* Alloc an spi_device */
+		spi = spi_alloc_device(master);
+		if (!spi) {
+			dev_err(&master->dev, "spi_device alloc error for %s\n",
+				nc->full_name);
+			spi_dev_put(spi);
+			continue;
+		}
+
+		/* Select device driver */
+		if (of_modalias_node(nc, spi->modalias,
+				     sizeof(spi->modalias)) < 0) {
+			dev_err(&master->dev, "cannot find modalias for %s\n",
+				nc->full_name);
+			spi_dev_put(spi);
+			continue;
+		}
+
+		/* Device address */
+		prop = of_get_property(nc, "reg", &len);
+		if (!prop || len < sizeof(*prop)) {
+			dev_err(&master->dev, "%s has no 'reg' property\n",
+				nc->full_name);
+			spi_dev_put(spi);
+			continue;
+		}
+		spi->chip_select = *prop;
+
+		/* Mode (clock phase/polarity/etc.) */
+		if (of_find_property(nc, "spi-cpha", NULL))
+			spi->mode |= SPI_CPHA;
+		if (of_find_property(nc, "spi-cpol", NULL))
+			spi->mode |= SPI_CPOL;
+
+		/* Device speed */
+		prop = of_get_property(nc, "spi-max-frequency", &len);
+		if (!prop || len < sizeof(*prop)) {
+			dev_err(&master->dev, "%s has no 'spi-max-frequency' property\n",
+				nc->full_name);
+			spi_dev_put(spi);
+			continue;
+		}
+		spi->max_speed_hz = *prop;
+
+		/* IRQ */
+		spi->irq = irq_of_parse_and_map(nc, 0);
+
+		/* Store a pointer to the node in the device structure */
+		of_node_get(nc);
+		spi->dev.archdata.of_node = nc;
+
+		/* Register the new device */
+		request_module(spi->modalias);
+		rc = spi_add_device(spi);
+		if (rc) {
+			dev_err(&master->dev, "spi_device register error %s\n",
+				nc->full_name);
+			spi_dev_put(spi);
+		}
+
+	}
+}
+EXPORT_SYMBOL(of_register_spi_devices);
diff --git a/include/linux/of_spi.h b/include/linux/of_spi.h
new file mode 100644
index 000000000000..5f71ee8c0868
--- /dev/null
+++ b/include/linux/of_spi.h
@@ -0,0 +1,18 @@
+/*
+ * OpenFirmware SPI support routines
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ *
+ * Support routines for deriving SPI device attachments from the device
+ * tree.
+ */
+
+#ifndef __LINUX_OF_SPI_H
+#define __LINUX_OF_SPI_H
+
+#include <linux/of.h>
+#include <linux/spi/spi.h>
+
+extern void of_register_spi_devices(struct spi_master *master,
+				    struct device_node *np);
+
+#endif /* __LINUX_OF_SPI */
-- 
cgit v1.2.3


From 3bc9f79ee1ddc913be0a6d3592036683ef8a3148 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 25 Jul 2008 14:57:58 +0200
Subject: iommu: add iommu_num_pages helper function

Calculating the number of pages from given address and length numbers is a task
required in multiple IOMMU implementations. So implement this as a generic
function into the IOMMU helper code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/iommu-helper.h | 1 +
 lib/iommu-helper.c           | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index c975caf75385..f8598f583944 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -8,3 +8,4 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 				      unsigned long align_mask);
 extern void iommu_area_free(unsigned long *map, unsigned long start,
 			    unsigned int nr);
+extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index a3b8d4c3f77a..889ddce2021e 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -80,3 +80,11 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
 	}
 }
 EXPORT_SYMBOL(iommu_area_free);
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+	unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+	return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
-- 
cgit v1.2.3


From b8d317d10cca76cabe6b03ebfeb23cc99118b731 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Thu, 24 Jul 2008 18:21:29 -0700
Subject: cpumask: make cpumask_of_cpu_map generic

If an arch doesn't define cpumask_of_cpu_map, create a generic
statically-initialized one for them.  This allows removal of the buggy
cpumask_of_cpu() macro (&cpumask_of_cpu() gives address of
out-of-scope var).

An arch with NR_CPUS of 4096 probably wants to allocate this itself
based on the actual number of CPUs, since otherwise they're using 2MB
of rodata (1024 cpus means 128k).  That's what
CONFIG_HAVE_CPUMASK_OF_CPU_MAP is for (only x86/64 does so at the
moment).

In future as we support more CPUs, we'll need to resort to a
get_cpu_map()/put_cpu_map() allocation scheme.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpumask.h |  41 ++----------------
 kernel/cpu.c            | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 112 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 1b5c98e7fef7..8fa3b6d4a320 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -62,15 +62,7 @@
  * int next_cpu_nr(cpu, mask)		Next cpu past 'cpu', or nr_cpu_ids
  *
  * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
- *ifdef CONFIG_HAS_CPUMASK_OF_CPU
- * cpumask_of_cpu_ptr_declare(v)	Declares cpumask_t *v
- * cpumask_of_cpu_ptr_next(v, cpu)	Sets v = &cpumask_of_cpu_map[cpu]
- * cpumask_of_cpu_ptr(v, cpu)		Combines above two operations
- *else
- * cpumask_of_cpu_ptr_declare(v)	Declares cpumask_t _v and *v = &_v
- * cpumask_of_cpu_ptr_next(v, cpu)	Sets _v = cpumask_of_cpu(cpu)
- * cpumask_of_cpu_ptr(v, cpu)		Combines above two operations
- *endif
+ *					(can be used as an lvalue)
  * CPU_MASK_ALL				Initializer - all bits set
  * CPU_MASK_NONE			Initializer - no bits set
  * unsigned long *cpus_addr(mask)	Array of unsigned long's in mask
@@ -274,36 +266,9 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 }
 
 
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-extern cpumask_t *cpumask_of_cpu_map;
+/* cpumask_of_cpu_map[] is in kernel/cpu.c */
+extern const cpumask_t *cpumask_of_cpu_map;
 #define cpumask_of_cpu(cpu)	(cpumask_of_cpu_map[cpu])
-#define	cpumask_of_cpu_ptr(v, cpu)					\
-		const cpumask_t *v = &cpumask_of_cpu(cpu)
-#define	cpumask_of_cpu_ptr_declare(v)					\
-		const cpumask_t *v
-#define cpumask_of_cpu_ptr_next(v, cpu)					\
-					v = &cpumask_of_cpu(cpu)
-#else
-#define cpumask_of_cpu(cpu)						\
-({									\
-	typeof(_unused_cpumask_arg_) m;					\
-	if (sizeof(m) == sizeof(unsigned long)) {			\
-		m.bits[0] = 1UL<<(cpu);					\
-	} else {							\
-		cpus_clear(m);						\
-		cpu_set((cpu), m);					\
-	}								\
-	m;								\
-})
-#define	cpumask_of_cpu_ptr(v, cpu) 					\
-		cpumask_t _##v = cpumask_of_cpu(cpu);			\
-		const cpumask_t *v = &_##v
-#define	cpumask_of_cpu_ptr_declare(v)					\
-		cpumask_t _##v;						\
-		const cpumask_t *v = &_##v
-#define cpumask_of_cpu_ptr_next(v, cpu)					\
-					_##v = cpumask_of_cpu(cpu)
-#endif
 
 #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 10ba5f1004a5..fe31ff3d3809 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -461,3 +461,112 @@ out:
 #endif /* CONFIG_PM_SLEEP_SMP */
 
 #endif /* CONFIG_SMP */
+
+#ifndef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
+/* 64 bits of zeros, for initializers. */
+#if BITS_PER_LONG == 32
+#define Z64 0, 0
+#else
+#define Z64 0
+#endif
+
+/* Initializer macros. */
+#define CMI0(n) { .bits = { 1UL << (n) } }
+#define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } }
+
+#define CMI8(n, ...)						\
+	CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__),		\
+	CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__),	\
+	CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__),	\
+	CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)
+
+#if BITS_PER_LONG == 32
+#define CMI64(n, ...)							\
+	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
+	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
+	CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__),	\
+	CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
+#else
+#define CMI64(n, ...)							\
+	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
+	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
+	CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__),	\
+	CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
+#endif
+
+#define CMI256(n, ...)							\
+	CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__),	\
+	CMI64((n)+128, Z64, Z64, __VA_ARGS__),				\
+	CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__)
+#define Z256 Z64, Z64, Z64, Z64
+
+#define CMI1024(n, ...)					\
+	CMI256((n), __VA_ARGS__),			\
+	CMI256((n)+256, Z256, __VA_ARGS__),		\
+	CMI256((n)+512, Z256, Z256, __VA_ARGS__),	\
+	CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
+#define Z1024 Z256, Z256, Z256, Z256
+
+/* We want this statically initialized, just to be safe.  We try not
+ * to waste too much space, either. */
+static const cpumask_t cpumask_map[] = {
+	CMI0(0), CMI0(1), CMI0(2), CMI0(3),
+#if NR_CPUS > 4
+	CMI0(4), CMI0(5), CMI0(6), CMI0(7),
+#endif
+#if NR_CPUS > 8
+	CMI0(8), CMI0(9), CMI0(10), CMI0(11),
+	CMI0(12), CMI0(13), CMI0(14), CMI0(15),
+#endif
+#if NR_CPUS > 16
+	CMI0(16), CMI0(17), CMI0(18), CMI0(19),
+	CMI0(20), CMI0(21), CMI0(22), CMI0(23),
+	CMI0(24), CMI0(25), CMI0(26), CMI0(27),
+	CMI0(28), CMI0(29), CMI0(30), CMI0(31),
+#endif
+#if NR_CPUS > 32
+#if BITS_PER_LONG == 32
+	CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
+	CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
+	CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
+	CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
+	CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
+	CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
+	CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
+	CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
+#else
+	CMI0(32), CMI0(33), CMI0(34), CMI0(35),
+	CMI0(36), CMI0(37), CMI0(38), CMI0(39),
+	CMI0(40), CMI0(41), CMI0(42), CMI0(43),
+	CMI0(44), CMI0(45), CMI0(46), CMI0(47),
+	CMI0(48), CMI0(49), CMI0(50), CMI0(51),
+	CMI0(52), CMI0(53), CMI0(54), CMI0(55),
+	CMI0(56), CMI0(57), CMI0(58), CMI0(59),
+	CMI0(60), CMI0(61), CMI0(62), CMI0(63),
+#endif /* BITS_PER_LONG == 64 */
+#endif
+#if NR_CPUS > 64
+	CMI64(64, Z64),
+#endif
+#if NR_CPUS > 128
+	CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
+#endif
+#if NR_CPUS > 256
+	CMI256(256, Z256),
+#endif
+#if NR_CPUS > 512
+	CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
+#endif
+#if NR_CPUS > 1024
+	CMI1024(1024, Z1024),
+#endif
+#if NR_CPUS > 2048
+	CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
+#endif
+#if NR_CPUS > 4096
+#error NR_CPUS too big.  Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
+#endif
+};
+
+const cpumask_t *cpumask_of_cpu_map = cpumask_map;
+#endif /* !CONFIG_HAVE_CPUMASK_OF_CPU_MAP */
-- 
cgit v1.2.3


From fdd2a7e2dac56a3384068802be46b822f2aed703 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@infradead.org>
Date: Sat, 26 Jul 2008 13:25:25 -0300
Subject: V4L/DVB (8500a): videotext.h: whitespace cleanup

Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videotext.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videotext.h b/include/linux/videotext.h
index 018f92047ff8..3e68c8d1c7f7 100644
--- a/include/linux/videotext.h
+++ b/include/linux/videotext.h
@@ -45,10 +45,10 @@
 #define VTXIOCCLRCACHE_OLD 0x710b  /* clear cache on VTX-interface (if avail.) */
 #define VTXIOCSETVIRT_OLD  0x710c  /* turn on virtual mode (this disables TV-display) */
 
-/* 
+/*
  *	Definitions for VTXIOCGETINFO
  */
- 
+
 #define SAA5243 0
 #define SAA5246 1
 #define SAA5249 2
@@ -57,10 +57,10 @@
 
 typedef struct {
 	int version_major, version_minor;	/* version of driver; if version_major changes, driver */
-						/* is not backward compatible!!! CHECK THIS!!! */  
+						/* is not backward compatible!!! CHECK THIS!!! */
 	int numpages;				/* number of page-buffers of vtx-chipset */
 	int cct_type;				/* type of vtx-chipset (SAA5243, SAA5246, SAA5248 or
-  						 * SAA5249) */
+						 * SAA5249) */
 }
 vtx_info_t;
 
@@ -81,7 +81,7 @@ vtx_info_t;
 #define PGMASK_HOUR (HR_TEN | HR_UNIT)
 #define PGMASK_MINUTE (MIN_TEN | MIN_UNIT)
 
-typedef struct 
+typedef struct
 {
 	int page;	/* number of requested page (hexadecimal) */
 	int hour;	/* requested hour (hexadecimal) */
@@ -98,11 +98,11 @@ vtx_pagereq_t;
 /*
  *	Definitions for VTXIOC{GETSTAT,PUTSTAT}
  */
- 
+
 #define VTX_PAGESIZE (40 * 24)
 #define VTX_VIRTUALSIZE (40 * 49)
 
-typedef struct 
+typedef struct
 {
 	int pagenum;			/* number of page (hexadecimal) */
 	int hour;			/* hour (hexadecimal) */
@@ -121,5 +121,5 @@ typedef struct
 	unsigned hamming : 1;		/* hamming-error occurred */
 }
 vtx_pageinfo_t;
- 
+
 #endif /* _VTX_H */
-- 
cgit v1.2.3


From 9993e51c0c47ec69dce1f26c2321af6bb9165e9e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@infradead.org>
Date: Sat, 26 Jul 2008 13:53:46 -0300
Subject: V4L/DVB (8502): videodev2.h: CodingStyle cleanups

Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev2.h | 378 ++++++++++++++++++++--------------------------
 1 file changed, 166 insertions(+), 212 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 2e66a95e8d32..cc0c8952323b 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -91,8 +91,8 @@
  */
 
 /*  Four-character-code (FOURCC) */
-#define v4l2_fourcc(a,b,c,d)\
-	(((__u32)(a)<<0)|((__u32)(b)<<8)|((__u32)(c)<<16)|((__u32)(d)<<24))
+#define v4l2_fourcc(a, b, c, d)\
+	((__u32)(a) | ((__u32)(b) << 8) | ((__u32)(c) << 16) | ((__u32)(d) << 24))
 
 /*
  *	E N U M S
@@ -226,8 +226,7 @@ struct v4l2_fract {
 /*
  *	D R I V E R   C A P A B I L I T I E S
  */
-struct v4l2_capability
-{
+struct v4l2_capability {
 	__u8	driver[16];	/* i.e. "bttv" */
 	__u8	card[32];	/* i.e. "Hauppauge WinTV" */
 	__u8	bus_info[32];	/* "PCI:" + pci_name(pci_dev) */
@@ -259,8 +258,7 @@ struct v4l2_capability
 /*
  *	V I D E O   I M A G E   F O R M A T
  */
-struct v4l2_pix_format
-{
+struct v4l2_pix_format {
 	__u32         		width;
 	__u32			height;
 	__u32			pixelformat;
@@ -272,68 +270,67 @@ struct v4l2_pix_format
 };
 
 /*      Pixel format         FOURCC                        depth  Description  */
-#define V4L2_PIX_FMT_RGB332  v4l2_fourcc('R','G','B','1') /*  8  RGB-3-3-2     */
-#define V4L2_PIX_FMT_RGB444  v4l2_fourcc('R','4','4','4') /* 16  xxxxrrrr ggggbbbb */
-#define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R','G','B','O') /* 16  RGB-5-5-5     */
-#define V4L2_PIX_FMT_RGB565  v4l2_fourcc('R','G','B','P') /* 16  RGB-5-6-5     */
-#define V4L2_PIX_FMT_RGB555X v4l2_fourcc('R','G','B','Q') /* 16  RGB-5-5-5 BE  */
-#define V4L2_PIX_FMT_RGB565X v4l2_fourcc('R','G','B','R') /* 16  RGB-5-6-5 BE  */
-#define V4L2_PIX_FMT_BGR24   v4l2_fourcc('B','G','R','3') /* 24  BGR-8-8-8     */
-#define V4L2_PIX_FMT_RGB24   v4l2_fourcc('R','G','B','3') /* 24  RGB-8-8-8     */
-#define V4L2_PIX_FMT_BGR32   v4l2_fourcc('B','G','R','4') /* 32  BGR-8-8-8-8   */
-#define V4L2_PIX_FMT_RGB32   v4l2_fourcc('R','G','B','4') /* 32  RGB-8-8-8-8   */
-#define V4L2_PIX_FMT_GREY    v4l2_fourcc('G','R','E','Y') /*  8  Greyscale     */
-#define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y','1','6',' ') /* 16  Greyscale     */
-#define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P','A','L','8') /*  8  8-bit palette */
-#define V4L2_PIX_FMT_YVU410  v4l2_fourcc('Y','V','U','9') /*  9  YVU 4:1:0     */
-#define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y','V','1','2') /* 12  YVU 4:2:0     */
-#define V4L2_PIX_FMT_YUYV    v4l2_fourcc('Y','U','Y','V') /* 16  YUV 4:2:2     */
-#define V4L2_PIX_FMT_UYVY    v4l2_fourcc('U','Y','V','Y') /* 16  YUV 4:2:2     */
-#define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4','2','2','P') /* 16  YVU422 planar */
-#define V4L2_PIX_FMT_YUV411P v4l2_fourcc('4','1','1','P') /* 16  YVU411 planar */
-#define V4L2_PIX_FMT_Y41P    v4l2_fourcc('Y','4','1','P') /* 12  YUV 4:1:1     */
-#define V4L2_PIX_FMT_YUV444  v4l2_fourcc('Y','4','4','4') /* 16  xxxxyyyy uuuuvvvv */
-#define V4L2_PIX_FMT_YUV555  v4l2_fourcc('Y','U','V','O') /* 16  YUV-5-5-5     */
-#define V4L2_PIX_FMT_YUV565  v4l2_fourcc('Y','U','V','P') /* 16  YUV-5-6-5     */
-#define V4L2_PIX_FMT_YUV32   v4l2_fourcc('Y','U','V','4') /* 32  YUV-8-8-8-8   */
+#define V4L2_PIX_FMT_RGB332  v4l2_fourcc('R', 'G', 'B', '1') /*  8  RGB-3-3-2     */
+#define V4L2_PIX_FMT_RGB444  v4l2_fourcc('R', '4', '4', '4') /* 16  xxxxrrrr ggggbbbb */
+#define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R', 'G', 'B', 'O') /* 16  RGB-5-5-5     */
+#define V4L2_PIX_FMT_RGB565  v4l2_fourcc('R', 'G', 'B', 'P') /* 16  RGB-5-6-5     */
+#define V4L2_PIX_FMT_RGB555X v4l2_fourcc('R', 'G', 'B', 'Q') /* 16  RGB-5-5-5 BE  */
+#define V4L2_PIX_FMT_RGB565X v4l2_fourcc('R', 'G', 'B', 'R') /* 16  RGB-5-6-5 BE  */
+#define V4L2_PIX_FMT_BGR24   v4l2_fourcc('B', 'G', 'R', '3') /* 24  BGR-8-8-8     */
+#define V4L2_PIX_FMT_RGB24   v4l2_fourcc('R', 'G', 'B', '3') /* 24  RGB-8-8-8     */
+#define V4L2_PIX_FMT_BGR32   v4l2_fourcc('B', 'G', 'R', '4') /* 32  BGR-8-8-8-8   */
+#define V4L2_PIX_FMT_RGB32   v4l2_fourcc('R', 'G', 'B', '4') /* 32  RGB-8-8-8-8   */
+#define V4L2_PIX_FMT_GREY    v4l2_fourcc('G', 'R', 'E', 'Y') /*  8  Greyscale     */
+#define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
+#define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
+#define V4L2_PIX_FMT_YVU410  v4l2_fourcc('Y', 'V', 'U', '9') /*  9  YVU 4:1:0     */
+#define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y', 'V', '1', '2') /* 12  YVU 4:2:0     */
+#define V4L2_PIX_FMT_YUYV    v4l2_fourcc('Y', 'U', 'Y', 'V') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_UYVY    v4l2_fourcc('U', 'Y', 'V', 'Y') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4', '2', '2', 'P') /* 16  YVU422 planar */
+#define V4L2_PIX_FMT_YUV411P v4l2_fourcc('4', '1', '1', 'P') /* 16  YVU411 planar */
+#define V4L2_PIX_FMT_Y41P    v4l2_fourcc('Y', '4', '1', 'P') /* 12  YUV 4:1:1     */
+#define V4L2_PIX_FMT_YUV444  v4l2_fourcc('Y', '4', '4', '4') /* 16  xxxxyyyy uuuuvvvv */
+#define V4L2_PIX_FMT_YUV555  v4l2_fourcc('Y', 'U', 'V', 'O') /* 16  YUV-5-5-5     */
+#define V4L2_PIX_FMT_YUV565  v4l2_fourcc('Y', 'U', 'V', 'P') /* 16  YUV-5-6-5     */
+#define V4L2_PIX_FMT_YUV32   v4l2_fourcc('Y', 'U', 'V', '4') /* 32  YUV-8-8-8-8   */
 
 /* two planes -- one Y, one Cr + Cb interleaved  */
-#define V4L2_PIX_FMT_NV12    v4l2_fourcc('N','V','1','2') /* 12  Y/CbCr 4:2:0  */
-#define V4L2_PIX_FMT_NV21    v4l2_fourcc('N','V','2','1') /* 12  Y/CrCb 4:2:0  */
+#define V4L2_PIX_FMT_NV12    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
+#define V4L2_PIX_FMT_NV21    v4l2_fourcc('N', 'V', '2', '1') /* 12  Y/CrCb 4:2:0  */
 
 /*  The following formats are not defined in the V4L2 specification */
-#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y','U','V','9') /*  9  YUV 4:1:0     */
-#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y','U','1','2') /* 12  YUV 4:2:0     */
-#define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y','Y','U','V') /* 16  YUV 4:2:2     */
-#define V4L2_PIX_FMT_HI240   v4l2_fourcc('H','I','2','4') /*  8  8-bit color   */
-#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H','M','1','2') /*  8  YUV 4:2:0 16x16 macroblocks */
+#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
+#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
+#define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y', 'Y', 'U', 'V') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
+#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 
 /* see http://www.siliconimaging.com/RGB%20Bayer.htm */
-#define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B','A','8','1') /*  8  BGBG.. GRGR.. */
-#define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G','B','R','G') /*  8  GBGB.. RGRG.. */
-#define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B','Y','R','2') /* 16  BGBG.. GRGR.. */
+#define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
+#define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G', 'B', 'R', 'G') /*  8  GBGB.. RGRG.. */
+#define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B', 'Y', 'R', '2') /* 16  BGBG.. GRGR.. */
 
 /* compressed formats */
-#define V4L2_PIX_FMT_MJPEG    v4l2_fourcc('M','J','P','G') /* Motion-JPEG   */
-#define V4L2_PIX_FMT_JPEG     v4l2_fourcc('J','P','E','G') /* JFIF JPEG     */
-#define V4L2_PIX_FMT_DV       v4l2_fourcc('d','v','s','d') /* 1394          */
-#define V4L2_PIX_FMT_MPEG     v4l2_fourcc('M','P','E','G') /* MPEG-1/2/4    */
+#define V4L2_PIX_FMT_MJPEG    v4l2_fourcc('M', 'J', 'P', 'G') /* Motion-JPEG   */
+#define V4L2_PIX_FMT_JPEG     v4l2_fourcc('J', 'P', 'E', 'G') /* JFIF JPEG     */
+#define V4L2_PIX_FMT_DV       v4l2_fourcc('d', 'v', 's', 'd') /* 1394          */
+#define V4L2_PIX_FMT_MPEG     v4l2_fourcc('M', 'P', 'E', 'G') /* MPEG-1/2/4    */
 
 /*  Vendor-specific formats   */
-#define V4L2_PIX_FMT_WNVA     v4l2_fourcc('W','N','V','A') /* Winnov hw compress */
-#define V4L2_PIX_FMT_SN9C10X  v4l2_fourcc('S','9','1','0') /* SN9C10x compression */
-#define V4L2_PIX_FMT_PWC1     v4l2_fourcc('P','W','C','1') /* pwc older webcam */
-#define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P','W','C','2') /* pwc newer webcam */
-#define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E','6','2','5') /* ET61X251 compression */
-#define V4L2_PIX_FMT_SPCA501  v4l2_fourcc('S','5','0','1') /* YUYV per line */
-#define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S','5','6','1') /* compressed GBRG bayer */
-#define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P','2','0','7') /* compressed BGGR bayer */
+#define V4L2_PIX_FMT_WNVA     v4l2_fourcc('W', 'N', 'V', 'A') /* Winnov hw compress */
+#define V4L2_PIX_FMT_SN9C10X  v4l2_fourcc('S', '9', '1', '0') /* SN9C10x compression */
+#define V4L2_PIX_FMT_PWC1     v4l2_fourcc('P', 'W', 'C', '1') /* pwc older webcam */
+#define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P', 'W', 'C', '2') /* pwc newer webcam */
+#define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E', '6', '2', '5') /* ET61X251 compression */
+#define V4L2_PIX_FMT_SPCA501  v4l2_fourcc('S', '5', '0', '1') /* YUYV per line */
+#define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S', '5', '6', '1') /* compressed GBRG bayer */
+#define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P', '2', '0', '7') /* compressed BGGR bayer */
 
 /*
  *	F O R M A T   E N U M E R A T I O N
  */
-struct v4l2_fmtdesc
-{
+struct v4l2_fmtdesc {
 	__u32		    index;             /* Format number      */
 	enum v4l2_buf_type  type;              /* buffer type        */
 	__u32               flags;
@@ -349,21 +346,18 @@ struct v4l2_fmtdesc
 /*
  *	F R A M E   S I Z E   E N U M E R A T I O N
  */
-enum v4l2_frmsizetypes
-{
+enum v4l2_frmsizetypes {
 	V4L2_FRMSIZE_TYPE_DISCRETE	= 1,
 	V4L2_FRMSIZE_TYPE_CONTINUOUS	= 2,
 	V4L2_FRMSIZE_TYPE_STEPWISE	= 3,
 };
 
-struct v4l2_frmsize_discrete
-{
+struct v4l2_frmsize_discrete {
 	__u32			width;		/* Frame width [pixel] */
 	__u32			height;		/* Frame height [pixel] */
 };
 
-struct v4l2_frmsize_stepwise
-{
+struct v4l2_frmsize_stepwise {
 	__u32			min_width;	/* Minimum frame width [pixel] */
 	__u32			max_width;	/* Maximum frame width [pixel] */
 	__u32			step_width;	/* Frame width step size [pixel] */
@@ -372,8 +366,7 @@ struct v4l2_frmsize_stepwise
 	__u32			step_height;	/* Frame height step size [pixel] */
 };
 
-struct v4l2_frmsizeenum
-{
+struct v4l2_frmsizeenum {
 	__u32			index;		/* Frame size number */
 	__u32			pixel_format;	/* Pixel format */
 	__u32			type;		/* Frame size type the device supports. */
@@ -389,22 +382,19 @@ struct v4l2_frmsizeenum
 /*
  *	F R A M E   R A T E   E N U M E R A T I O N
  */
-enum v4l2_frmivaltypes
-{
+enum v4l2_frmivaltypes {
 	V4L2_FRMIVAL_TYPE_DISCRETE	= 1,
 	V4L2_FRMIVAL_TYPE_CONTINUOUS	= 2,
 	V4L2_FRMIVAL_TYPE_STEPWISE	= 3,
 };
 
-struct v4l2_frmival_stepwise
-{
+struct v4l2_frmival_stepwise {
 	struct v4l2_fract	min;		/* Minimum frame interval [s] */
 	struct v4l2_fract	max;		/* Maximum frame interval [s] */
 	struct v4l2_fract	step;		/* Frame interval step size [s] */
 };
 
-struct v4l2_frmivalenum
-{
+struct v4l2_frmivalenum {
 	__u32			index;		/* Frame format index */
 	__u32			pixel_format;	/* Pixel format */
 	__u32			width;		/* Frame width */
@@ -423,8 +413,7 @@ struct v4l2_frmivalenum
 /*
  *	T I M E C O D E
  */
-struct v4l2_timecode
-{
+struct v4l2_timecode {
 	__u32	type;
 	__u32	flags;
 	__u8	frames;
@@ -449,8 +438,7 @@ struct v4l2_timecode
 #define V4L2_TC_USERBITS_8BITCHARS	0x0008
 /* The above is based on SMPTE timecodes */
 
-struct v4l2_jpegcompression
-{
+struct v4l2_jpegcompression {
 	int quality;
 
 	int  APPn;              /* Number of APP segment to be written,
@@ -482,16 +470,14 @@ struct v4l2_jpegcompression
 /*
  *	M E M O R Y - M A P P I N G   B U F F E R S
  */
-struct v4l2_requestbuffers
-{
+struct v4l2_requestbuffers {
 	__u32			count;
 	enum v4l2_buf_type      type;
 	enum v4l2_memory        memory;
 	__u32			reserved[2];
 };
 
-struct v4l2_buffer
-{
+struct v4l2_buffer {
 	__u32			index;
 	enum v4l2_buf_type      type;
 	__u32			bytesused;
@@ -525,13 +511,12 @@ struct v4l2_buffer
 /*
  *	O V E R L A Y   P R E V I E W
  */
-struct v4l2_framebuffer
-{
+struct v4l2_framebuffer {
 	__u32			capability;
 	__u32			flags;
 /* FIXME: in theory we should pass something like PCI device + memory
  * region + offset instead of some physical address */
-	void*                   base;
+	void                    *base;
 	struct v4l2_pix_format	fmt;
 };
 /*  Flags for the 'capability' field. Read only */
@@ -550,14 +535,12 @@ struct v4l2_framebuffer
 #define V4L2_FBUF_FLAG_GLOBAL_ALPHA	0x0010
 #define V4L2_FBUF_FLAG_LOCAL_INV_ALPHA	0x0020
 
-struct v4l2_clip
-{
+struct v4l2_clip {
 	struct v4l2_rect        c;
 	struct v4l2_clip	__user *next;
 };
 
-struct v4l2_window
-{
+struct v4l2_window {
 	struct v4l2_rect        w;
 	enum v4l2_field  	field;
 	__u32			chromakey;
@@ -570,8 +553,7 @@ struct v4l2_window
 /*
  *	C A P T U R E   P A R A M E T E R S
  */
-struct v4l2_captureparm
-{
+struct v4l2_captureparm {
 	__u32		   capability;	  /*  Supported modes */
 	__u32		   capturemode;	  /*  Current mode */
 	struct v4l2_fract  timeperframe;  /*  Time per frame in .1us units */
@@ -584,8 +566,7 @@ struct v4l2_captureparm
 #define V4L2_MODE_HIGHQUALITY	0x0001	/*  High quality imaging mode */
 #define V4L2_CAP_TIMEPERFRAME	0x1000	/*  timeperframe field is supported */
 
-struct v4l2_outputparm
-{
+struct v4l2_outputparm {
 	__u32		   capability;	 /*  Supported modes */
 	__u32		   outputmode;	 /*  Current mode */
 	struct v4l2_fract  timeperframe; /*  Time per frame in seconds */
@@ -702,8 +683,7 @@ typedef __u64 v4l2_std_id;
 #define V4L2_STD_ALL            (V4L2_STD_525_60	|\
 				 V4L2_STD_625_50)
 
-struct v4l2_standard
-{
+struct v4l2_standard {
 	__u32		     index;
 	v4l2_std_id          id;
 	__u8		     name[24];
@@ -715,8 +695,7 @@ struct v4l2_standard
 /*
  *	V I D E O   I N P U T S
  */
-struct v4l2_input
-{
+struct v4l2_input {
 	__u32	     index;		/*  Which input */
 	__u8	     name[32];		/*  Label */
 	__u32	     type;		/*  Type of input */
@@ -753,8 +732,7 @@ struct v4l2_input
 /*
  *	V I D E O   O U T P U T S
  */
-struct v4l2_output
-{
+struct v4l2_output {
 	__u32	     index;		/*  Which output */
 	__u8	     name[32];		/*  Label */
 	__u32	     type;		/*  Type of output */
@@ -771,14 +749,12 @@ struct v4l2_output
 /*
  *	C O N T R O L S
  */
-struct v4l2_control
-{
+struct v4l2_control {
 	__u32		     id;
 	__s32		     value;
 };
 
-struct v4l2_ext_control
-{
+struct v4l2_ext_control {
 	__u32 id;
 	__u32 reserved2[2];
 	union {
@@ -788,8 +764,7 @@ struct v4l2_ext_control
 	};
 } __attribute__ ((packed));
 
-struct v4l2_ext_controls
-{
+struct v4l2_ext_controls {
 	__u32 ctrl_class;
 	__u32 count;
 	__u32 error_idx;
@@ -807,8 +782,7 @@ struct v4l2_ext_controls
 #define V4L2_CTRL_DRIVER_PRIV(id) (((id) & 0xffff) >= 0x1000)
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
-struct v4l2_queryctrl
-{
+struct v4l2_queryctrl {
 	__u32		     id;
 	enum v4l2_ctrl_type  type;
 	__u8		     name[32];	/* Whatever */
@@ -821,8 +795,7 @@ struct v4l2_queryctrl
 };
 
 /*  Used in the VIDIOC_QUERYMENU ioctl for querying menu items */
-struct v4l2_querymenu
-{
+struct v4l2_querymenu {
 	__u32		id;
 	__u32		index;
 	__u8		name[32];	/* Whatever */
@@ -1104,8 +1077,7 @@ enum  v4l2_exposure_auto_type {
 /*
  *	T U N I N G
  */
-struct v4l2_tuner
-{
+struct v4l2_tuner {
 	__u32                   index;
 	__u8			name[32];
 	enum v4l2_tuner_type    type;
@@ -1119,8 +1091,7 @@ struct v4l2_tuner
 	__u32			reserved[4];
 };
 
-struct v4l2_modulator
-{
+struct v4l2_modulator {
 	__u32			index;
 	__u8			name[32];
 	__u32			capability;
@@ -1153,8 +1124,7 @@ struct v4l2_modulator
 #define V4L2_TUNER_MODE_LANG1		0x0003
 #define V4L2_TUNER_MODE_LANG1_LANG2	0x0004
 
-struct v4l2_frequency
-{
+struct v4l2_frequency {
 	__u32		      tuner;
 	enum v4l2_tuner_type  type;
 	__u32		      frequency;
@@ -1172,8 +1142,7 @@ struct v4l2_hw_freq_seek {
 /*
  *	A U D I O
  */
-struct v4l2_audio
-{
+struct v4l2_audio {
 	__u32	index;
 	__u8	name[32];
 	__u32	capability;
@@ -1188,8 +1157,7 @@ struct v4l2_audio
 /*  Flags for the 'mode' field */
 #define V4L2_AUDMODE_AVL		0x00001
 
-struct v4l2_audioout
-{
+struct v4l2_audioout {
 	__u32	index;
 	__u8	name[32];
 	__u32	capability;
@@ -1253,8 +1221,7 @@ struct v4l2_encoder_cmd {
  */
 
 /* Raw VBI */
-struct v4l2_vbi_format
-{
+struct v4l2_vbi_format {
 	__u32	sampling_rate;		/* in 1 Hz */
 	__u32	offset;
 	__u32	samples_per_line;
@@ -1266,8 +1233,8 @@ struct v4l2_vbi_format
 };
 
 /*  VBI flags  */
-#define V4L2_VBI_UNSYNC		(1<< 0)
-#define V4L2_VBI_INTERLACED	(1<< 1)
+#define V4L2_VBI_UNSYNC		(1 << 0)
+#define V4L2_VBI_INTERLACED	(1 << 1)
 
 /* Sliced VBI
  *
@@ -1276,8 +1243,7 @@ struct v4l2_vbi_format
  * notice in the definitive implementation.
  */
 
-struct v4l2_sliced_vbi_format
-{
+struct v4l2_sliced_vbi_format {
 	__u16   service_set;
 	/* service_lines[0][...] specifies lines 0-23 (1-23 used) of the first field
 	   service_lines[1][...] specifies lines 0-23 (1-23 used) of the second field
@@ -1301,8 +1267,7 @@ struct v4l2_sliced_vbi_format
 #define V4L2_SLICED_VBI_525             (V4L2_SLICED_CAPTION_525)
 #define V4L2_SLICED_VBI_625             (V4L2_SLICED_TELETEXT_B | V4L2_SLICED_VPS | V4L2_SLICED_WSS_625)
 
-struct v4l2_sliced_vbi_cap
-{
+struct v4l2_sliced_vbi_cap {
 	__u16   service_set;
 	/* service_lines[0][...] specifies lines 0-23 (1-23 used) of the first field
 	   service_lines[1][...] specifies lines 0-23 (1-23 used) of the second field
@@ -1313,8 +1278,7 @@ struct v4l2_sliced_vbi_cap
 	__u32   reserved[3];    /* must be 0 */
 };
 
-struct v4l2_sliced_vbi_data
-{
+struct v4l2_sliced_vbi_data {
 	__u32   id;
 	__u32   field;          /* 0: first field, 1: second field */
 	__u32   line;           /* 1-23 */
@@ -1328,27 +1292,23 @@ struct v4l2_sliced_vbi_data
 
 /*	Stream data format
  */
-struct v4l2_format
-{
+struct v4l2_format {
 	enum v4l2_buf_type type;
-	union
-	{
-		struct v4l2_pix_format		pix;     // V4L2_BUF_TYPE_VIDEO_CAPTURE
-		struct v4l2_window		win;     // V4L2_BUF_TYPE_VIDEO_OVERLAY
-		struct v4l2_vbi_format		vbi;     // V4L2_BUF_TYPE_VBI_CAPTURE
-		struct v4l2_sliced_vbi_format	sliced;  // V4L2_BUF_TYPE_SLICED_VBI_CAPTURE
-		__u8	raw_data[200];                   // user-defined
+	union {
+		struct v4l2_pix_format		pix;     /* V4L2_BUF_TYPE_VIDEO_CAPTURE */
+		struct v4l2_window		win;     /* V4L2_BUF_TYPE_VIDEO_OVERLAY */
+		struct v4l2_vbi_format		vbi;     /* V4L2_BUF_TYPE_VBI_CAPTURE */
+		struct v4l2_sliced_vbi_format	sliced;  /* V4L2_BUF_TYPE_SLICED_VBI_CAPTURE */
+		__u8	raw_data[200];                   /* user-defined */
 	} fmt;
 };
 
 
 /*	Stream type-dependent parameters
  */
-struct v4l2_streamparm
-{
+struct v4l2_streamparm {
 	enum v4l2_buf_type type;
-	union
-	{
+	union {
 		struct v4l2_captureparm	capture;
 		struct v4l2_outputparm	output;
 		__u8	raw_data[200];  /* user-defined */
@@ -1386,92 +1346,86 @@ struct v4l2_chip_ident {
  *	I O C T L   C O D E S   F O R   V I D E O   D E V I C E S
  *
  */
-#define VIDIOC_QUERYCAP		_IOR  ('V',  0, struct v4l2_capability)
-#define VIDIOC_RESERVED		_IO   ('V',  1)
-#define VIDIOC_ENUM_FMT         _IOWR ('V',  2, struct v4l2_fmtdesc)
-#define VIDIOC_G_FMT		_IOWR ('V',  4, struct v4l2_format)
-#define VIDIOC_S_FMT		_IOWR ('V',  5, struct v4l2_format)
-#define VIDIOC_REQBUFS		_IOWR ('V',  8, struct v4l2_requestbuffers)
-#define VIDIOC_QUERYBUF		_IOWR ('V',  9, struct v4l2_buffer)
-#define VIDIOC_G_FBUF		_IOR  ('V', 10, struct v4l2_framebuffer)
-#define VIDIOC_S_FBUF		_IOW  ('V', 11, struct v4l2_framebuffer)
-#define VIDIOC_OVERLAY		_IOW  ('V', 14, int)
-#define VIDIOC_QBUF		_IOWR ('V', 15, struct v4l2_buffer)
-#define VIDIOC_DQBUF		_IOWR ('V', 17, struct v4l2_buffer)
-#define VIDIOC_STREAMON		_IOW  ('V', 18, int)
-#define VIDIOC_STREAMOFF	_IOW  ('V', 19, int)
-#define VIDIOC_G_PARM		_IOWR ('V', 21, struct v4l2_streamparm)
-#define VIDIOC_S_PARM		_IOWR ('V', 22, struct v4l2_streamparm)
-#define VIDIOC_G_STD		_IOR  ('V', 23, v4l2_std_id)
-#define VIDIOC_S_STD		_IOW  ('V', 24, v4l2_std_id)
-#define VIDIOC_ENUMSTD		_IOWR ('V', 25, struct v4l2_standard)
-#define VIDIOC_ENUMINPUT	_IOWR ('V', 26, struct v4l2_input)
-#define VIDIOC_G_CTRL		_IOWR ('V', 27, struct v4l2_control)
-#define VIDIOC_S_CTRL		_IOWR ('V', 28, struct v4l2_control)
-#define VIDIOC_G_TUNER		_IOWR ('V', 29, struct v4l2_tuner)
-#define VIDIOC_S_TUNER		_IOW  ('V', 30, struct v4l2_tuner)
-#define VIDIOC_G_AUDIO		_IOR  ('V', 33, struct v4l2_audio)
-#define VIDIOC_S_AUDIO		_IOW  ('V', 34, struct v4l2_audio)
-#define VIDIOC_QUERYCTRL	_IOWR ('V', 36, struct v4l2_queryctrl)
-#define VIDIOC_QUERYMENU	_IOWR ('V', 37, struct v4l2_querymenu)
-#define VIDIOC_G_INPUT		_IOR  ('V', 38, int)
-#define VIDIOC_S_INPUT		_IOWR ('V', 39, int)
-#define VIDIOC_G_OUTPUT		_IOR  ('V', 46, int)
-#define VIDIOC_S_OUTPUT		_IOWR ('V', 47, int)
-#define VIDIOC_ENUMOUTPUT	_IOWR ('V', 48, struct v4l2_output)
-#define VIDIOC_G_AUDOUT		_IOR  ('V', 49, struct v4l2_audioout)
-#define VIDIOC_S_AUDOUT		_IOW  ('V', 50, struct v4l2_audioout)
-#define VIDIOC_G_MODULATOR	_IOWR ('V', 54, struct v4l2_modulator)
-#define VIDIOC_S_MODULATOR	_IOW  ('V', 55, struct v4l2_modulator)
-#define VIDIOC_G_FREQUENCY	_IOWR ('V', 56, struct v4l2_frequency)
-#define VIDIOC_S_FREQUENCY	_IOW  ('V', 57, struct v4l2_frequency)
-#define VIDIOC_CROPCAP		_IOWR ('V', 58, struct v4l2_cropcap)
-#define VIDIOC_G_CROP		_IOWR ('V', 59, struct v4l2_crop)
-#define VIDIOC_S_CROP		_IOW  ('V', 60, struct v4l2_crop)
-#define VIDIOC_G_JPEGCOMP	_IOR  ('V', 61, struct v4l2_jpegcompression)
-#define VIDIOC_S_JPEGCOMP	_IOW  ('V', 62, struct v4l2_jpegcompression)
-#define VIDIOC_QUERYSTD      	_IOR  ('V', 63, v4l2_std_id)
-#define VIDIOC_TRY_FMT      	_IOWR ('V', 64, struct v4l2_format)
-#define VIDIOC_ENUMAUDIO	_IOWR ('V', 65, struct v4l2_audio)
-#define VIDIOC_ENUMAUDOUT	_IOWR ('V', 66, struct v4l2_audioout)
-#define VIDIOC_G_PRIORITY       _IOR  ('V', 67, enum v4l2_priority)
-#define VIDIOC_S_PRIORITY       _IOW  ('V', 68, enum v4l2_priority)
-#define VIDIOC_G_SLICED_VBI_CAP _IOWR ('V', 69, struct v4l2_sliced_vbi_cap)
-#define VIDIOC_LOG_STATUS       _IO   ('V', 70)
-#define VIDIOC_G_EXT_CTRLS	_IOWR ('V', 71, struct v4l2_ext_controls)
-#define VIDIOC_S_EXT_CTRLS	_IOWR ('V', 72, struct v4l2_ext_controls)
-#define VIDIOC_TRY_EXT_CTRLS	_IOWR ('V', 73, struct v4l2_ext_controls)
+#define VIDIOC_QUERYCAP		 _IOR('V',  0, struct v4l2_capability)
+#define VIDIOC_RESERVED		  _IO('V',  1)
+#define VIDIOC_ENUM_FMT         _IOWR('V',  2, struct v4l2_fmtdesc)
+#define VIDIOC_G_FMT		_IOWR('V',  4, struct v4l2_format)
+#define VIDIOC_S_FMT		_IOWR('V',  5, struct v4l2_format)
+#define VIDIOC_REQBUFS		_IOWR('V',  8, struct v4l2_requestbuffers)
+#define VIDIOC_QUERYBUF		_IOWR('V',  9, struct v4l2_buffer)
+#define VIDIOC_G_FBUF		 _IOR('V', 10, struct v4l2_framebuffer)
+#define VIDIOC_S_FBUF		 _IOW('V', 11, struct v4l2_framebuffer)
+#define VIDIOC_OVERLAY		 _IOW('V', 14, int)
+#define VIDIOC_QBUF		_IOWR('V', 15, struct v4l2_buffer)
+#define VIDIOC_DQBUF		_IOWR('V', 17, struct v4l2_buffer)
+#define VIDIOC_STREAMON		 _IOW('V', 18, int)
+#define VIDIOC_STREAMOFF	 _IOW('V', 19, int)
+#define VIDIOC_G_PARM		_IOWR('V', 21, struct v4l2_streamparm)
+#define VIDIOC_S_PARM		_IOWR('V', 22, struct v4l2_streamparm)
+#define VIDIOC_G_STD		 _IOR('V', 23, v4l2_std_id)
+#define VIDIOC_S_STD		 _IOW('V', 24, v4l2_std_id)
+#define VIDIOC_ENUMSTD		_IOWR('V', 25, struct v4l2_standard)
+#define VIDIOC_ENUMINPUT	_IOWR('V', 26, struct v4l2_input)
+#define VIDIOC_G_CTRL		_IOWR('V', 27, struct v4l2_control)
+#define VIDIOC_S_CTRL		_IOWR('V', 28, struct v4l2_control)
+#define VIDIOC_G_TUNER		_IOWR('V', 29, struct v4l2_tuner)
+#define VIDIOC_S_TUNER		 _IOW('V', 30, struct v4l2_tuner)
+#define VIDIOC_G_AUDIO		 _IOR('V', 33, struct v4l2_audio)
+#define VIDIOC_S_AUDIO		 _IOW('V', 34, struct v4l2_audio)
+#define VIDIOC_QUERYCTRL	_IOWR('V', 36, struct v4l2_queryctrl)
+#define VIDIOC_QUERYMENU	_IOWR('V', 37, struct v4l2_querymenu)
+#define VIDIOC_G_INPUT		 _IOR('V', 38, int)
+#define VIDIOC_S_INPUT		_IOWR('V', 39, int)
+#define VIDIOC_G_OUTPUT		 _IOR('V', 46, int)
+#define VIDIOC_S_OUTPUT		_IOWR('V', 47, int)
+#define VIDIOC_ENUMOUTPUT	_IOWR('V', 48, struct v4l2_output)
+#define VIDIOC_G_AUDOUT		 _IOR('V', 49, struct v4l2_audioout)
+#define VIDIOC_S_AUDOUT		 _IOW('V', 50, struct v4l2_audioout)
+#define VIDIOC_G_MODULATOR	_IOWR('V', 54, struct v4l2_modulator)
+#define VIDIOC_S_MODULATOR	 _IOW('V', 55, struct v4l2_modulator)
+#define VIDIOC_G_FREQUENCY	_IOWR('V', 56, struct v4l2_frequency)
+#define VIDIOC_S_FREQUENCY	 _IOW('V', 57, struct v4l2_frequency)
+#define VIDIOC_CROPCAP		_IOWR('V', 58, struct v4l2_cropcap)
+#define VIDIOC_G_CROP		_IOWR('V', 59, struct v4l2_crop)
+#define VIDIOC_S_CROP		 _IOW('V', 60, struct v4l2_crop)
+#define VIDIOC_G_JPEGCOMP	 _IOR('V', 61, struct v4l2_jpegcompression)
+#define VIDIOC_S_JPEGCOMP	 _IOW('V', 62, struct v4l2_jpegcompression)
+#define VIDIOC_QUERYSTD      	 _IOR('V', 63, v4l2_std_id)
+#define VIDIOC_TRY_FMT      	_IOWR('V', 64, struct v4l2_format)
+#define VIDIOC_ENUMAUDIO	_IOWR('V', 65, struct v4l2_audio)
+#define VIDIOC_ENUMAUDOUT	_IOWR('V', 66, struct v4l2_audioout)
+#define VIDIOC_G_PRIORITY        _IOR('V', 67, enum v4l2_priority)
+#define VIDIOC_S_PRIORITY        _IOW('V', 68, enum v4l2_priority)
+#define VIDIOC_G_SLICED_VBI_CAP _IOWR('V', 69, struct v4l2_sliced_vbi_cap)
+#define VIDIOC_LOG_STATUS         _IO('V', 70)
+#define VIDIOC_G_EXT_CTRLS	_IOWR('V', 71, struct v4l2_ext_controls)
+#define VIDIOC_S_EXT_CTRLS	_IOWR('V', 72, struct v4l2_ext_controls)
+#define VIDIOC_TRY_EXT_CTRLS	_IOWR('V', 73, struct v4l2_ext_controls)
 #if 1
-#define VIDIOC_ENUM_FRAMESIZES	_IOWR ('V', 74, struct v4l2_frmsizeenum)
-#define VIDIOC_ENUM_FRAMEINTERVALS	_IOWR ('V', 75, struct v4l2_frmivalenum)
-#define VIDIOC_G_ENC_INDEX      _IOR  ('V', 76, struct v4l2_enc_idx)
-#define VIDIOC_ENCODER_CMD      _IOWR ('V', 77, struct v4l2_encoder_cmd)
-#define VIDIOC_TRY_ENCODER_CMD  _IOWR ('V', 78, struct v4l2_encoder_cmd)
+#define VIDIOC_ENUM_FRAMESIZES	_IOWR('V', 74, struct v4l2_frmsizeenum)
+#define VIDIOC_ENUM_FRAMEINTERVALS _IOWR('V', 75, struct v4l2_frmivalenum)
+#define VIDIOC_G_ENC_INDEX       _IOR('V', 76, struct v4l2_enc_idx)
+#define VIDIOC_ENCODER_CMD      _IOWR('V', 77, struct v4l2_encoder_cmd)
+#define VIDIOC_TRY_ENCODER_CMD  _IOWR('V', 78, struct v4l2_encoder_cmd)
 
 /* Experimental, only implemented if CONFIG_VIDEO_ADV_DEBUG is defined */
-#define	VIDIOC_DBG_S_REGISTER 	_IOW  ('V', 79, struct v4l2_register)
-#define	VIDIOC_DBG_G_REGISTER 	_IOWR ('V', 80, struct v4l2_register)
+#define	VIDIOC_DBG_S_REGISTER 	 _IOW('V', 79, struct v4l2_register)
+#define	VIDIOC_DBG_G_REGISTER 	_IOWR('V', 80, struct v4l2_register)
 
-#define VIDIOC_G_CHIP_IDENT     _IOWR ('V', 81, struct v4l2_chip_ident)
+#define VIDIOC_G_CHIP_IDENT     _IOWR('V', 81, struct v4l2_chip_ident)
 #endif
-#define VIDIOC_S_HW_FREQ_SEEK	_IOW  ('V', 82, struct v4l2_hw_freq_seek)
+#define VIDIOC_S_HW_FREQ_SEEK	 _IOW('V', 82, struct v4l2_hw_freq_seek)
 
 #ifdef __OLD_VIDIOC_
 /* for compatibility, will go away some day */
-#define VIDIOC_OVERLAY_OLD     	_IOWR ('V', 14, int)
-#define VIDIOC_S_PARM_OLD      	_IOW  ('V', 22, struct v4l2_streamparm)
-#define VIDIOC_S_CTRL_OLD      	_IOW  ('V', 28, struct v4l2_control)
-#define VIDIOC_G_AUDIO_OLD     	_IOWR ('V', 33, struct v4l2_audio)
-#define VIDIOC_G_AUDOUT_OLD    	_IOWR ('V', 49, struct v4l2_audioout)
-#define VIDIOC_CROPCAP_OLD     	_IOR  ('V', 58, struct v4l2_cropcap)
+#define VIDIOC_OVERLAY_OLD     	_IOWR('V', 14, int)
+#define VIDIOC_S_PARM_OLD      	 _IOW('V', 22, struct v4l2_streamparm)
+#define VIDIOC_S_CTRL_OLD      	 _IOW('V', 28, struct v4l2_control)
+#define VIDIOC_G_AUDIO_OLD     	_IOWR('V', 33, struct v4l2_audio)
+#define VIDIOC_G_AUDOUT_OLD    	_IOWR('V', 49, struct v4l2_audioout)
+#define VIDIOC_CROPCAP_OLD     	 _IOR('V', 58, struct v4l2_cropcap)
 #endif
 
 #define BASE_VIDIOC_PRIVATE	192		/* 192-255 are private */
 
 #endif /* __LINUX_VIDEODEV2_H */
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
-- 
cgit v1.2.3


From 1250ac6d4ab716dafe0ac245fd31cd3a7cbc0a98 Mon Sep 17 00:00:00 2001
From: Jean-Francois Moine <moinejf@free.fr>
Date: Sat, 26 Jul 2008 08:02:47 -0300
Subject: V4L/DVB (8518): gspca: Remove the remaining frame decoding functions
 from the subdrivers.

SPCA505 and SPCA508 added in the pixel formats.
Decode functions and associated resources removed in spca505, 506 and 508.
The decode routines are now found in the V4L library.

Signed-off-by: Jean-Francois Moine <moinejf@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 drivers/media/video/gspca/spca505.c | 105 ++++++++++--------------------------
 drivers/media/video/gspca/spca506.c | 105 ++++++++++--------------------------
 drivers/media/video/gspca/spca508.c |  91 +++++++------------------------
 include/linux/videodev2.h           |   2 +
 4 files changed, 76 insertions(+), 227 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/gspca/spca505.c b/drivers/media/video/gspca/spca505.c
index 284d549e4d3e..32ffe5556061 100644
--- a/drivers/media/video/gspca/spca505.c
+++ b/drivers/media/video/gspca/spca505.c
@@ -31,10 +31,6 @@ MODULE_LICENSE("GPL");
 struct sd {
 	struct gspca_dev gspca_dev;		/* !! must be the first item */
 
-	int buflen;
-	unsigned char tmpbuf[640 * 480 * 3 / 2]; /* YYUV per line */
-	unsigned char tmpbuf2[640 * 480 * 2];	/* YUYV */
-
 	unsigned char brightness;
 
 	char subtype;
@@ -64,29 +60,29 @@ static struct ctrl sd_ctrls[] = {
 };
 
 static struct v4l2_pix_format vga_mode[] = {
-	{160, 120, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 160 * 2,
-		.sizeimage = 160 * 120 * 2,
+	{160, 120, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 160 * 3,
+		.sizeimage = 160 * 120 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 5},
-	{176, 144, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 176 * 2,
-		.sizeimage = 176 * 144 * 2,
+	{176, 144, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 176 * 3,
+		.sizeimage = 176 * 144 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 4},
-	{320, 240, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 320 * 2,
-		.sizeimage = 320 * 240 * 2,
+	{320, 240, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 320 * 3,
+		.sizeimage = 320 * 240 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 2},
-	{352, 288, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 352 * 2,
-		.sizeimage = 352 * 288 * 2,
+	{352, 288, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 352 * 3,
+		.sizeimage = 352 * 288 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 1},
-	{640, 480, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 640 * 2,
-		.sizeimage = 640 * 480 * 2,
+	{640, 480, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 640 * 3,
+		.sizeimage = 640 * 480 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 0},
 };
@@ -760,77 +756,30 @@ static void sd_close(struct gspca_dev *gspca_dev)
 	reg_write(gspca_dev->dev, 0x05, 0x11, 0xf);
 }
 
-/* convert YYUV per line to YUYV (YUV 4:2:2) */
-static void yyuv_decode(unsigned char *out,
-			unsigned char *in,
-			int width,
-			int height)
-{
-	unsigned char *Ui, *Vi, *yi, *yi1;
-	unsigned char *out1;
-	int i, j;
-
-	yi = in;
-	for (i = height / 2; --i >= 0; ) {
-		out1 = out + width * 2;		/* next line */
-		yi1 = yi + width;
-		Ui = yi1 + width;
-		Vi = Ui + width / 2;
-		for (j = width / 2; --j >= 0; ) {
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Ui;
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Vi;
-
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Ui++;
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Vi++;
-		}
-		yi += width * 2;
-		out = out1;
-	}
-}
-
 static void sd_pkt_scan(struct gspca_dev *gspca_dev,
 			struct gspca_frame *frame,	/* target */
 			__u8 *data,			/* isoc packet */
 			int len)			/* iso packet length */
 {
-	struct sd *sd = (struct sd *) gspca_dev;
-
 	switch (data[0]) {
 	case 0:				/* start of frame */
-		if (gspca_dev->last_packet_type == FIRST_PACKET) {
-			yyuv_decode(sd->tmpbuf2, sd->tmpbuf,
-					gspca_dev->width,
-					gspca_dev->height);
-			frame = gspca_frame_add(gspca_dev,
-						LAST_PACKET,
-						frame,
-						sd->tmpbuf2,
-						gspca_dev->width
-							* gspca_dev->height
-							* 2);
-		}
-		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
-				data, 0);
+		frame = gspca_frame_add(gspca_dev, LAST_PACKET, frame,
+					data, 0);
 		data += SPCA50X_OFFSET_DATA;
 		len -= SPCA50X_OFFSET_DATA;
-		if (len > 0)
-			memcpy(sd->tmpbuf, data, len);
-		else
-			len = 0;
-		sd->buflen = len;
-		return;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	case 0xff:			/* drop */
 /*		gspca_dev->last_packet_type = DISCARD_PACKET; */
-		return;
+		break;
+	default:
+		data += 1;
+		len -= 1;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	}
-	data += 1;
-	len -= 1;
-	memcpy(&sd->tmpbuf[sd->buflen], data, len);
-	sd->buflen += len;
 }
 
 static void setbrightness(struct gspca_dev *gspca_dev)
diff --git a/drivers/media/video/gspca/spca506.c b/drivers/media/video/gspca/spca506.c
index 2c281a0563e5..6fe715c80ad2 100644
--- a/drivers/media/video/gspca/spca506.c
+++ b/drivers/media/video/gspca/spca506.c
@@ -33,10 +33,6 @@ MODULE_LICENSE("GPL");
 struct sd {
 	struct gspca_dev gspca_dev;	/* !! must be the first item */
 
-	int buflen;
-	__u8 tmpbuf[640 * 480 * 3];	/* YYUV per line */
-	__u8 tmpbuf2[640 * 480 * 2];	/* YUYV */
-
 	unsigned char brightness;
 	unsigned char contrast;
 	unsigned char colors;
@@ -115,29 +111,29 @@ static struct ctrl sd_ctrls[] = {
 };
 
 static struct v4l2_pix_format vga_mode[] = {
-	{160, 120, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 160 * 2,
-		.sizeimage = 160 * 120 * 2,
+	{160, 120, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 160 * 3,
+		.sizeimage = 160 * 120 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 5},
-	{176, 144, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 176 * 2,
-		.sizeimage = 176 * 144 * 2,
+	{176, 144, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 176 * 3,
+		.sizeimage = 176 * 144 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 4},
-	{320, 240, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 320 * 2,
-		.sizeimage = 320 * 240 * 2,
+	{320, 240, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 320 * 3,
+		.sizeimage = 320 * 240 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 2},
-	{352, 288, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 352 * 2,
-		.sizeimage = 352 * 288 * 2,
+	{352, 288, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 352 * 3,
+		.sizeimage = 352 * 288 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 1},
-	{640, 480, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 640 * 2,
-		.sizeimage = 640 * 480 * 2,
+	{640, 480, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 640 * 3,
+		.sizeimage = 640 * 480 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 0},
 };
@@ -572,77 +568,30 @@ static void sd_close(struct gspca_dev *gspca_dev)
 {
 }
 
-/* convert YYUV per line to YUYV (YUV 4:2:2) */
-static void yyuv_decode(unsigned char *out,
-			unsigned char *in,
-			int width,
-			int height)
-{
-	unsigned char *Ui, *Vi, *yi, *yi1;
-	unsigned char *out1;
-	int i, j;
-
-	yi = in;
-	for (i = height / 2; --i >= 0; ) {
-		out1 = out + width * 2;		/* next line */
-		yi1 = yi + width;
-		Ui = yi1 + width;
-		Vi = Ui + width / 2;
-		for (j = width / 2; --j >= 0; ) {
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Ui;
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Vi;
-
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Ui++;
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Vi++;
-		}
-		yi += width * 2;
-		out = out1;
-	}
-}
-
 static void sd_pkt_scan(struct gspca_dev *gspca_dev,
 			struct gspca_frame *frame,	/* target */
 			__u8 *data,			/* isoc packet */
 			int len)			/* iso packet length */
 {
-	struct sd *sd = (struct sd *) gspca_dev;
-
 	switch (data[0]) {
 	case 0:				/* start of frame */
-		if (gspca_dev->last_packet_type == FIRST_PACKET) {
-			yyuv_decode(sd->tmpbuf2, sd->tmpbuf,
-					gspca_dev->width,
-					gspca_dev->height);
-			frame = gspca_frame_add(gspca_dev,
-						LAST_PACKET,
-						frame,
-						sd->tmpbuf2,
-						gspca_dev->width
-							* gspca_dev->height
-							* 2);
-		}
-		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
-				data, 0);
+		frame = gspca_frame_add(gspca_dev, LAST_PACKET, frame,
+					data, 0);
 		data += SPCA50X_OFFSET_DATA;
 		len -= SPCA50X_OFFSET_DATA;
-		if (len > 0)
-			memcpy(sd->tmpbuf, data, len);
-		else
-			len = 0;
-		sd->buflen = len;
-		return;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	case 0xff:			/* drop */
 /*		gspca_dev->last_packet_type = DISCARD_PACKET; */
-		return;
+		break;
+	default:
+		data += 1;
+		len -= 1;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	}
-	data += 1;
-	len -= 1;
-	memcpy(&sd->tmpbuf[sd->buflen], data, len);
-	sd->buflen += len;
 }
 
 static void setbrightness(struct gspca_dev *gspca_dev)
diff --git a/drivers/media/video/gspca/spca508.c b/drivers/media/video/gspca/spca508.c
index af531d62856c..4378e966edcc 100644
--- a/drivers/media/video/gspca/spca508.c
+++ b/drivers/media/video/gspca/spca508.c
@@ -30,10 +30,6 @@ MODULE_LICENSE("GPL");
 struct sd {
 	struct gspca_dev gspca_dev;		/* !! must be the first item */
 
-	int buflen;
-	unsigned char tmpbuf[352 * 288 * 3 / 2]; /* YUVY per line */
-	unsigned char tmpbuf2[352 * 288 * 2];	/* YUYV */
-
 	unsigned char brightness;
 
 	char subtype;
@@ -68,23 +64,23 @@ static struct ctrl sd_ctrls[] = {
 
 static struct v4l2_pix_format sif_mode[] = {
 	{160, 120, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 160 * 2,
-		.sizeimage = 160 * 120 * 2,
+		.bytesperline = 160 * 3,
+		.sizeimage = 160 * 120 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 3},
 	{176, 144, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 176 * 2,
-		.sizeimage = 176 * 144 * 2,
+		.bytesperline = 176 * 3,
+		.sizeimage = 176 * 144 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 2},
 	{320, 240, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 320 * 2,
-		.sizeimage = 320 * 240 * 2,
+		.bytesperline = 320 * 3,
+		.sizeimage = 320 * 240 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 1},
 	{352, 288, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 352 * 2,
-		.sizeimage = 352 * 288 * 2,
+		.bytesperline = 352 * 3,
+		.sizeimage = 352 * 288 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 0},
 };
@@ -1567,77 +1563,30 @@ static void sd_close(struct gspca_dev *gspca_dev)
 {
 }
 
-/* convert YUVY per line to YUYV (YUV 4:2:2) */
-static void yuvy_decode(unsigned char *out,
-			unsigned char *in,
-			int width,
-			int height)
-{
-	unsigned char *Ui, *Vi, *yi, *yi1;
-	unsigned char *out1;
-	int i, j;
-
-	yi = in;
-	for (i = height / 2; --i >= 0; ) {
-		out1 = out + width * 2;		/* next line */
-		Ui = yi + width;
-		Vi = Ui + width / 2;
-		yi1 = Vi + width / 2;
-		for (j = width / 2; --j >= 0; ) {
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Ui;
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Vi;
-
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Ui++;
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Vi++;
-		}
-		yi += width * 2;
-		out = out1;
-	}
-}
-
 static void sd_pkt_scan(struct gspca_dev *gspca_dev,
 			struct gspca_frame *frame,	/* target */
 			__u8 *data,			/* isoc packet */
 			int len)			/* iso packet length */
 {
-	struct sd *sd = (struct sd *) gspca_dev;
-
 	switch (data[0]) {
 	case 0:				/* start of frame */
-		if (gspca_dev->last_packet_type == FIRST_PACKET) {
-			yuvy_decode(sd->tmpbuf2, sd->tmpbuf,
-					gspca_dev->width,
-					gspca_dev->height);
-			frame = gspca_frame_add(gspca_dev,
-						LAST_PACKET,
-						frame,
-						sd->tmpbuf2,
-						gspca_dev->width
-							* gspca_dev->height
-							* 2);
-		}
-		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
-				data, 0);
+		frame = gspca_frame_add(gspca_dev, LAST_PACKET, frame,
+					data, 0);
 		data += SPCA508_OFFSET_DATA;
 		len -= SPCA508_OFFSET_DATA;
-		if (len > 0)
-			memcpy(sd->tmpbuf, data, len);
-		else
-			len = 0;
-		sd->buflen = len;
-		return;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	case 0xff:			/* drop */
 /*		gspca_dev->last_packet_type = DISCARD_PACKET; */
-		return;
+		break;
+	default:
+		data += 1;
+		len -= 1;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	}
-	data += 1;
-	len -= 1;
-	memcpy(&sd->tmpbuf[sd->buflen], data, len);
-	sd->buflen += len;
 }
 
 static void setbrightness(struct gspca_dev *gspca_dev)
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index cc0c8952323b..7d9ac046389e 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -324,6 +324,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P', 'W', 'C', '2') /* pwc newer webcam */
 #define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E', '6', '2', '5') /* ET61X251 compression */
 #define V4L2_PIX_FMT_SPCA501  v4l2_fourcc('S', '5', '0', '1') /* YUYV per line */
+#define V4L2_PIX_FMT_SPCA505  v4l2_fourcc('S','5','0','5') /* YYUV per line */
+#define V4L2_PIX_FMT_SPCA508  v4l2_fourcc('S','5','0','8') /* YUVY per line */
 #define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S', '5', '6', '1') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P', '2', '0', '7') /* compressed BGGR bayer */
 
-- 
cgit v1.2.3


From c1d7f4f1648cb8efd87f1b9560c40af2297e7c05 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 26 Jul 2008 08:33:47 -0300
Subject: V4L/DVB (8524): videodev: copy the VID_TYPE defines to videodev.h

The VID_TYPE defines are V4L1 specific, so copy them back to videodev.h.
In videodev2.h ensure that they are not used in the kernel (you need
to include videodev.h instead) and mark them are deprecated.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev.h  | 15 +++++++++++++++
 include/linux/videodev2.h |  6 ++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev.h b/include/linux/videodev.h
index 9385a566aed8..15a653d41132 100644
--- a/include/linux/videodev.h
+++ b/include/linux/videodev.h
@@ -17,6 +17,21 @@
 
 #if defined(CONFIG_VIDEO_V4L1_COMPAT) || !defined (__KERNEL__)
 
+#define VID_TYPE_CAPTURE	1	/* Can capture */
+#define VID_TYPE_TUNER		2	/* Can tune */
+#define VID_TYPE_TELETEXT	4	/* Does teletext */
+#define VID_TYPE_OVERLAY	8	/* Overlay onto frame buffer */
+#define VID_TYPE_CHROMAKEY	16	/* Overlay by chromakey */
+#define VID_TYPE_CLIPPING	32	/* Can clip */
+#define VID_TYPE_FRAMERAM	64	/* Uses the frame buffer memory */
+#define VID_TYPE_SCALES		128	/* Scalable */
+#define VID_TYPE_MONOCHROME	256	/* Monochrome only */
+#define VID_TYPE_SUBCAPTURE	512	/* Can capture subareas of the image */
+#define VID_TYPE_MPEG_DECODER	1024	/* Can decode MPEG streams */
+#define VID_TYPE_MPEG_ENCODER	2048	/* Can encode MPEG streams */
+#define VID_TYPE_MJPEG_DECODER	4096	/* Can decode MJPEG streams */
+#define VID_TYPE_MJPEG_ENCODER	8192	/* Can encode MJPEG streams */
+
 struct video_capability
 {
 	char name[32];
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 7d9ac046389e..f7195351a1e7 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -71,6 +71,11 @@
  */
 #define VIDEO_MAX_FRAME               32
 
+#ifndef __KERNEL__
+
+/* These defines are V4L1 specific and should not be used with the V4L2 API!
+   They will be removed from this header in the future. */
+
 #define VID_TYPE_CAPTURE	1	/* Can capture */
 #define VID_TYPE_TUNER		2	/* Can tune */
 #define VID_TYPE_TELETEXT	4	/* Does teletext */
@@ -85,6 +90,7 @@
 #define VID_TYPE_MPEG_ENCODER	2048	/* Can encode MPEG streams */
 #define VID_TYPE_MJPEG_DECODER	4096	/* Can decode MJPEG streams */
 #define VID_TYPE_MJPEG_ENCODER	8192	/* Can encode MJPEG streams */
+#endif
 
 /*
  *	M I S C E L L A N E O U S
-- 
cgit v1.2.3


From 9fa0f6db3a201bef49f28e69f80802559a38586b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@infradead.org>
Date: Sun, 27 Jul 2008 08:55:17 -0300
Subject: V4L/DVB (8522): videodev2: Fix merge conflict

Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev2.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index f7195351a1e7..e466bd54a50e 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -330,8 +330,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P', 'W', 'C', '2') /* pwc newer webcam */
 #define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E', '6', '2', '5') /* ET61X251 compression */
 #define V4L2_PIX_FMT_SPCA501  v4l2_fourcc('S', '5', '0', '1') /* YUYV per line */
-#define V4L2_PIX_FMT_SPCA505  v4l2_fourcc('S','5','0','5') /* YYUV per line */
-#define V4L2_PIX_FMT_SPCA508  v4l2_fourcc('S','5','0','8') /* YUVY per line */
+#define V4L2_PIX_FMT_SPCA505  v4l2_fourcc('S', '5', '0', '5') /* YYUV per line */
+#define V4L2_PIX_FMT_SPCA508  v4l2_fourcc('S', '5', '0', '8') /* YUVY per line */
 #define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S', '5', '6', '1') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P', '2', '0', '7') /* compressed BGGR bayer */
 
-- 
cgit v1.2.3


From 5c2aed622571ac7c3c6ec182d6d3c318e4b45c8b Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 28 Feb 2008 11:33:03 -0500
Subject: stop_machine: add ALL_CPUS option

-allow stop_mahcine_run() to call a function on all cpus. Calling
 stop_machine_run() with a 'ALL_CPUS' invokes this new behavior.
 stop_machine_run() proceeds as normal until the calling cpu has
 invoked 'fn'. Then, we tell all the other cpus to call 'fn'.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
CC: Adrian Bunk <bunk@stusta.de>
CC: Andi Kleen <andi@firstfloor.org>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Christoph Hellwig <hch@infradead.org>
CC: mingo@elte.hu
CC: akpm@osdl.org
---
 include/linux/stop_machine.h |  8 +++++++-
 kernel/stop_machine.c        | 32 +++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 5bfc553bdb21..18af011c13af 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -8,11 +8,17 @@
 #include <asm/system.h>
 
 #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+
+#define ALL_CPUS ~0U
+
 /**
  * stop_machine_run: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn()
- * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS.
+ * @cpu: if @cpu == n, run @fn() on cpu n
+ *       if @cpu == NR_CPUS, run @fn() on any cpu
+ *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
+ *       concurrently on all the other cpus
  *
  * Description: This causes a thread to be scheduled on every other cpu,
  * each of which disables interrupts, and finally interrupts are disabled
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 738b411ff2d3..a473bd0cb71b 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -22,9 +22,17 @@ enum stopmachine_state {
 	STOPMACHINE_WAIT,
 	STOPMACHINE_PREPARE,
 	STOPMACHINE_DISABLE_IRQ,
+	STOPMACHINE_RUN,
 	STOPMACHINE_EXIT,
 };
 
+struct stop_machine_data {
+	int (*fn)(void *);
+	void *data;
+	struct completion done;
+	int run_all;
+} smdata;
+
 static enum stopmachine_state stopmachine_state;
 static unsigned int stopmachine_num_threads;
 static atomic_t stopmachine_thread_ack;
@@ -33,6 +41,7 @@ static int stopmachine(void *cpu)
 {
 	int irqs_disabled = 0;
 	int prepared = 0;
+	int ran = 0;
 	cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
 
 	set_cpus_allowed_ptr(current, cpumask);
@@ -58,6 +67,11 @@ static int stopmachine(void *cpu)
 			prepared = 1;
 			smp_mb(); /* Must read state first. */
 			atomic_inc(&stopmachine_thread_ack);
+		} else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
+			smdata.fn(smdata.data);
+			ran = 1;
+			smp_mb(); /* Must read state first. */
+			atomic_inc(&stopmachine_thread_ack);
 		}
 		/* Yield in first stage: migration threads need to
 		 * help our sisters onto their CPUs. */
@@ -136,11 +150,10 @@ static void restart_machine(void)
 	preempt_enable_no_resched();
 }
 
-struct stop_machine_data {
-	int (*fn)(void *);
-	void *data;
-	struct completion done;
-};
+static void run_other_cpus(void)
+{
+	stopmachine_set_state(STOPMACHINE_RUN);
+}
 
 static int do_stop(void *_smdata)
 {
@@ -150,6 +163,8 @@ static int do_stop(void *_smdata)
 	ret = stop_machine();
 	if (ret == 0) {
 		ret = smdata->fn(smdata->data);
+		if (smdata->run_all)
+			run_other_cpus();
 		restart_machine();
 	}
 
@@ -173,14 +188,17 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
 	struct stop_machine_data smdata;
 	struct task_struct *p;
 
+	mutex_lock(&stopmachine_mutex);
+
 	smdata.fn = fn;
 	smdata.data = data;
+	smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
 	init_completion(&smdata.done);
 
-	mutex_lock(&stopmachine_mutex);
+	smp_wmb(); /* make sure other cpus see smdata updates */
 
 	/* If they don't care which CPU fn runs on, bind to any online one. */
-	if (cpu == NR_CPUS)
+	if (cpu == NR_CPUS || cpu == ALL_CPUS)
 		cpu = raw_smp_processor_id();
 
 	p = kthread_create(do_stop, &smdata, "kstopmachine");
-- 
cgit v1.2.3


From ffdb5976c47609c862917d4c186ecbb5706d2dda Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 28 Jul 2008 12:16:28 -0500
Subject: Simplify stop_machine

stop_machine creates a kthread which creates kernel threads.  We can
create those threads directly and simplify things a little.  Some care
must be taken with CPU hotunplug, which has special needs, but that code
seems more robust than it was in the past.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/stop_machine.h |  20 ++-
 kernel/cpu.c                 |  13 +-
 kernel/stop_machine.c        | 293 ++++++++++++++++++-------------------------
 3 files changed, 136 insertions(+), 190 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 18af011c13af..36c2c7284eb3 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -17,13 +17,12 @@
  * @data: the data ptr for the @fn()
  * @cpu: if @cpu == n, run @fn() on cpu n
  *       if @cpu == NR_CPUS, run @fn() on any cpu
- *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
- *       concurrently on all the other cpus
+ *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
  *
- * Description: This causes a thread to be scheduled on every other cpu,
- * each of which disables interrupts, and finally interrupts are disabled
- * on the current CPU.  The result is that noone is holding a spinlock
- * or inside any other preempt-disabled region when @fn() runs.
+ * Description: This causes a thread to be scheduled on every cpu,
+ * each of which disables interrupts.  The result is that noone is
+ * holding a spinlock or inside any other preempt-disabled region when
+ * @fn() runs.
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
@@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
  * @data: the data ptr for the @fn
  * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
  *
- * Description: This is a special version of the above, which returns the
- * thread which has run @fn(): kthread_stop will return the return value
- * of @fn().  Used by hotplug cpu.
+ * Description: This is a special version of the above, which assumes cpus
+ * won't come or go while it's being called.  Used by hotplug cpu.
  */
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
-				       unsigned int cpu);
-
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
 #else
 
 static inline int stop_machine_run(int (*fn)(void *), void *data,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 10ba5f1004a5..cf79bb911371 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -216,7 +216,6 @@ static int __ref take_cpu_down(void *_param)
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
 	int err, nr_calls = 0;
-	struct task_struct *p;
 	cpumask_t old_allowed, tmp;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
@@ -250,19 +249,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	cpu_clear(cpu, tmp);
 	set_cpus_allowed_ptr(current, &tmp);
 
-	p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
+	err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
 
-	if (IS_ERR(p) || cpu_online(cpu)) {
+	if (err || cpu_online(cpu)) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 
-		if (IS_ERR(p)) {
-			err = PTR_ERR(p);
-			goto out_allowed;
-		}
-		goto out_thread;
+		goto out_allowed;
 	}
 
 	/* Wait for it to sleep (leaving idle task). */
@@ -279,8 +274,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
 	check_for_tasks(cpu);
 
-out_thread:
-	err = kthread_stop(p);
 out_allowed:
 	set_cpus_allowed_ptr(current, &old_allowed);
 out_release:
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index a473bd0cb71b..35882dccc943 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -1,4 +1,4 @@
-/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
+/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
  * GPL v2 and any later version.
  */
 #include <linux/cpu.h>
@@ -13,220 +13,177 @@
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
-/* Since we effect priority and affinity (both of which are visible
- * to, and settable by outside processes) we do indirection via a
- * kthread. */
-
-/* Thread to stop each CPU in user context. */
+/* This controls the threads on each CPU. */
 enum stopmachine_state {
-	STOPMACHINE_WAIT,
+	/* Dummy starting state for thread. */
+	STOPMACHINE_NONE,
+	/* Awaiting everyone to be scheduled. */
 	STOPMACHINE_PREPARE,
+	/* Disable interrupts. */
 	STOPMACHINE_DISABLE_IRQ,
+	/* Run the function */
 	STOPMACHINE_RUN,
+	/* Exit */
 	STOPMACHINE_EXIT,
 };
+static enum stopmachine_state state;
 
 struct stop_machine_data {
 	int (*fn)(void *);
 	void *data;
-	struct completion done;
-	int run_all;
-} smdata;
+	int fnret;
+};
 
-static enum stopmachine_state stopmachine_state;
-static unsigned int stopmachine_num_threads;
-static atomic_t stopmachine_thread_ack;
+/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
+static unsigned int num_threads;
+static atomic_t thread_ack;
+static struct completion finished;
+static DEFINE_MUTEX(lock);
 
-static int stopmachine(void *cpu)
+static void set_state(enum stopmachine_state newstate)
 {
-	int irqs_disabled = 0;
-	int prepared = 0;
-	int ran = 0;
-	cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
-
-	set_cpus_allowed_ptr(current, cpumask);
-
-	/* Ack: we are alive */
-	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
-	atomic_inc(&stopmachine_thread_ack);
-
-	/* Simple state machine */
-	while (stopmachine_state != STOPMACHINE_EXIT) {
-		if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
-		    && !irqs_disabled) {
-			local_irq_disable();
-			hard_irq_disable();
-			irqs_disabled = 1;
-			/* Ack: irqs disabled. */
-			smp_mb(); /* Must read state first. */
-			atomic_inc(&stopmachine_thread_ack);
-		} else if (stopmachine_state == STOPMACHINE_PREPARE
-			   && !prepared) {
-			/* Everyone is in place, hold CPU. */
-			preempt_disable();
-			prepared = 1;
-			smp_mb(); /* Must read state first. */
-			atomic_inc(&stopmachine_thread_ack);
-		} else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
-			smdata.fn(smdata.data);
-			ran = 1;
-			smp_mb(); /* Must read state first. */
-			atomic_inc(&stopmachine_thread_ack);
-		}
-		/* Yield in first stage: migration threads need to
-		 * help our sisters onto their CPUs. */
-		if (!prepared && !irqs_disabled)
-			yield();
-		cpu_relax();
-	}
-
-	/* Ack: we are exiting. */
-	smp_mb(); /* Must read state first. */
-	atomic_inc(&stopmachine_thread_ack);
-
-	if (irqs_disabled)
-		local_irq_enable();
-	if (prepared)
-		preempt_enable();
-
-	return 0;
+	/* Reset ack counter. */
+	atomic_set(&thread_ack, num_threads);
+	smp_wmb();
+	state = newstate;
 }
 
-/* Change the thread state */
-static void stopmachine_set_state(enum stopmachine_state state)
+/* Last one to ack a state moves to the next state. */
+static void ack_state(void)
 {
-	atomic_set(&stopmachine_thread_ack, 0);
-	smp_wmb();
-	stopmachine_state = state;
-	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
-		cpu_relax();
+	if (atomic_dec_and_test(&thread_ack)) {
+		/* If we're the last one to ack the EXIT, we're finished. */
+		if (state == STOPMACHINE_EXIT)
+			complete(&finished);
+		else
+			set_state(state + 1);
+	}
 }
 
-static int stop_machine(void)
+/* This is the actual thread which stops the CPU.  It exits by itself rather
+ * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
+static int stop_cpu(struct stop_machine_data *smdata)
 {
-	int i, ret = 0;
-
-	atomic_set(&stopmachine_thread_ack, 0);
-	stopmachine_num_threads = 0;
-	stopmachine_state = STOPMACHINE_WAIT;
+	enum stopmachine_state curstate = STOPMACHINE_NONE;
+	int uninitialized_var(ret);
 
-	for_each_online_cpu(i) {
-		if (i == raw_smp_processor_id())
-			continue;
-		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
-		if (ret < 0)
-			break;
-		stopmachine_num_threads++;
-	}
-
-	/* Wait for them all to come to life. */
-	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
-		yield();
+	/* Simple state machine */
+	do {
+		/* Chill out and ensure we re-read stopmachine_state. */
 		cpu_relax();
-	}
-
-	/* If some failed, kill them all. */
-	if (ret < 0) {
-		stopmachine_set_state(STOPMACHINE_EXIT);
-		return ret;
-	}
-
-	/* Now they are all started, make them hold the CPUs, ready. */
-	preempt_disable();
-	stopmachine_set_state(STOPMACHINE_PREPARE);
-
-	/* Make them disable irqs. */
-	local_irq_disable();
-	hard_irq_disable();
-	stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
-
-	return 0;
-}
+		if (state != curstate) {
+			curstate = state;
+			switch (curstate) {
+			case STOPMACHINE_DISABLE_IRQ:
+				local_irq_disable();
+				hard_irq_disable();
+				break;
+			case STOPMACHINE_RUN:
+				/* |= allows error detection if functions on
+				 * multiple CPUs. */
+				smdata->fnret |= smdata->fn(smdata->data);
+				break;
+			default:
+				break;
+			}
+			ack_state();
+		}
+	} while (curstate != STOPMACHINE_EXIT);
 
-static void restart_machine(void)
-{
-	stopmachine_set_state(STOPMACHINE_EXIT);
 	local_irq_enable();
-	preempt_enable_no_resched();
+	do_exit(0);
 }
 
-static void run_other_cpus(void)
+/* Callback for CPUs which aren't supposed to do anything. */
+static int chill(void *unused)
 {
-	stopmachine_set_state(STOPMACHINE_RUN);
+	return 0;
 }
 
-static int do_stop(void *_smdata)
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 {
-	struct stop_machine_data *smdata = _smdata;
-	int ret;
+	int i, err;
+	struct stop_machine_data active, idle;
+	struct task_struct **threads;
+
+	active.fn = fn;
+	active.data = data;
+	active.fnret = 0;
+	idle.fn = chill;
+	idle.data = NULL;
+
+	/* If they don't care which cpu fn runs on, just pick one. */
+	if (cpu == NR_CPUS)
+		cpu = any_online_cpu(cpu_online_map);
+
+	/* This could be too big for stack on large machines. */
+	threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
+	if (!threads)
+		return -ENOMEM;
+
+	/* Set up initial state. */
+	mutex_lock(&lock);
+	init_completion(&finished);
+	num_threads = num_online_cpus();
+	set_state(STOPMACHINE_PREPARE);
 
-	ret = stop_machine();
-	if (ret == 0) {
-		ret = smdata->fn(smdata->data);
-		if (smdata->run_all)
-			run_other_cpus();
-		restart_machine();
-	}
+	for_each_online_cpu(i) {
+		struct stop_machine_data *smdata;
+		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
-	/* We're done: you can kthread_stop us now */
-	complete(&smdata->done);
+		if (cpu == ALL_CPUS || i == cpu)
+			smdata = &active;
+		else
+			smdata = &idle;
+
+		threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
+					    i);
+		if (IS_ERR(threads[i])) {
+			err = PTR_ERR(threads[i]);
+			threads[i] = NULL;
+			goto kill_threads;
+		}
 
-	/* Wait for kthread_stop */
-	set_current_state(TASK_INTERRUPTIBLE);
-	while (!kthread_should_stop()) {
-		schedule();
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-	__set_current_state(TASK_RUNNING);
-	return ret;
-}
+		/* Place it onto correct cpu. */
+		kthread_bind(threads[i], i);
 
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
-				       unsigned int cpu)
-{
-	static DEFINE_MUTEX(stopmachine_mutex);
-	struct stop_machine_data smdata;
-	struct task_struct *p;
+		/* Make it highest prio. */
+		if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
+			BUG();
+	}
 
-	mutex_lock(&stopmachine_mutex);
+	/* We've created all the threads.  Wake them all: hold this CPU so one
+	 * doesn't hit this CPU until we're ready. */
+	cpu = get_cpu();
+	for_each_online_cpu(i)
+		wake_up_process(threads[i]);
 
-	smdata.fn = fn;
-	smdata.data = data;
-	smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
-	init_completion(&smdata.done);
+	/* This will release the thread on our CPU. */
+	put_cpu();
+	wait_for_completion(&finished);
+	mutex_unlock(&lock);
 
-	smp_wmb(); /* make sure other cpus see smdata updates */
+	kfree(threads);
 
-	/* If they don't care which CPU fn runs on, bind to any online one. */
-	if (cpu == NR_CPUS || cpu == ALL_CPUS)
-		cpu = raw_smp_processor_id();
+	return active.fnret;
 
-	p = kthread_create(do_stop, &smdata, "kstopmachine");
-	if (!IS_ERR(p)) {
-		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+kill_threads:
+	for_each_online_cpu(i)
+		if (threads[i])
+			kthread_stop(threads[i]);
+	mutex_unlock(&lock);
 
-		/* One high-prio thread per cpu.  We'll do this one. */
-		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
-		kthread_bind(p, cpu);
-		wake_up_process(p);
-		wait_for_completion(&smdata.done);
-	}
-	mutex_unlock(&stopmachine_mutex);
-	return p;
+	kfree(threads);
+	return err;
 }
 
 int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 {
-	struct task_struct *p;
 	int ret;
 
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
-	p = __stop_machine_run(fn, data, cpu);
-	if (!IS_ERR(p))
-		ret = kthread_stop(p);
-	else
-		ret = PTR_ERR(p);
+	ret = __stop_machine_run(fn, data, cpu);
 	put_online_cpus();
 
 	return ret;
-- 
cgit v1.2.3


From eeec4fad963490821348a331cca6102ae1c4a7a3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 28 Jul 2008 12:16:30 -0500
Subject: stop_machine(): stop_machine_run() changed to use cpu mask

Instead of a "cpu" arg with magic values NR_CPUS (any cpu) and ~0 (all
cpus), pass a cpumask_t.  Allow NULL for the common case (where we
don't care which CPU the function is run on): temporary cpumask_t's
are usually considered bad for stack space.

This deprecates stop_machine_run, to be removed soon when all the
callers are dead.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/stop_machine.h | 34 ++++++++++++++++++++++++----------
 kernel/cpu.c                 |  3 ++-
 kernel/stop_machine.c        | 27 +++++++++++++--------------
 3 files changed, 39 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 36c2c7284eb3..f1cb0ba6d715 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -5,19 +5,19 @@
    (and more).  So the "read" side to such a lock is anything which
    diables preeempt. */
 #include <linux/cpu.h>
+#include <linux/cpumask.h>
 #include <asm/system.h>
 
 #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
 
+/* Deprecated, but useful for transition. */
 #define ALL_CPUS ~0U
 
 /**
- * stop_machine_run: freeze the machine on all CPUs and run this function
+ * stop_machine: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn()
- * @cpu: if @cpu == n, run @fn() on cpu n
- *       if @cpu == NR_CPUS, run @fn() on any cpu
- *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
  *
  * Description: This causes a thread to be scheduled on every cpu,
  * each of which disables interrupts.  The result is that noone is
@@ -26,22 +26,22 @@
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
 
 /**
- * __stop_machine_run: freeze the machine on all CPUs and run this function
+ * __stop_machine: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn
- * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
  *
  * Description: This is a special version of the above, which assumes cpus
  * won't come or go while it's being called.  Used by hotplug cpu.
  */
-int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
 #else
 
-static inline int stop_machine_run(int (*fn)(void *), void *data,
-				   unsigned int cpu)
+static inline int stop_machine(int (*fn)(void *), void *data,
+			       const cpumask_t *cpus)
 {
 	int ret;
 	local_irq_disable();
@@ -50,4 +50,18 @@ static inline int stop_machine_run(int (*fn)(void *), void *data,
 	return ret;
 }
 #endif /* CONFIG_SMP */
+
+static inline int __deprecated stop_machine_run(int (*fn)(void *), void *data,
+						unsigned int cpu)
+{
+	/* If they don't care which cpu fn runs on, just pick one. */
+	if (cpu == NR_CPUS)
+		return stop_machine(fn, data, NULL);
+	else if (cpu == ~0U)
+		return stop_machine(fn, data, &cpu_possible_map);
+	else {
+		cpumask_t cpus = cpumask_of_cpu(cpu);
+		return stop_machine(fn, data, &cpus);
+	}
+}
 #endif /* _LINUX_STOP_MACHINE */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 53cf508f975a..29510d68338a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -248,8 +248,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	cpus_setall(tmp);
 	cpu_clear(cpu, tmp);
 	set_cpus_allowed_ptr(current, &tmp);
+	tmp = cpumask_of_cpu(cpu);
 
-	err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
+	err = __stop_machine(take_cpu_down, &tcd_param, &tmp);
 	if (err) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 35882dccc943..e446c7c7d6a9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -100,7 +100,7 @@ static int chill(void *unused)
 	return 0;
 }
 
-int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
 {
 	int i, err;
 	struct stop_machine_data active, idle;
@@ -112,10 +112,6 @@ int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 	idle.fn = chill;
 	idle.data = NULL;
 
-	/* If they don't care which cpu fn runs on, just pick one. */
-	if (cpu == NR_CPUS)
-		cpu = any_online_cpu(cpu_online_map);
-
 	/* This could be too big for stack on large machines. */
 	threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
 	if (!threads)
@@ -128,13 +124,16 @@ int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 	set_state(STOPMACHINE_PREPARE);
 
 	for_each_online_cpu(i) {
-		struct stop_machine_data *smdata;
+		struct stop_machine_data *smdata = &idle;
 		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
-		if (cpu == ALL_CPUS || i == cpu)
-			smdata = &active;
-		else
-			smdata = &idle;
+		if (!cpus) {
+			if (i == first_cpu(cpu_online_map))
+				smdata = &active;
+		} else {
+			if (cpu_isset(i, *cpus))
+				smdata = &active;
+		}
 
 		threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
 					    i);
@@ -154,7 +153,7 @@ int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 
 	/* We've created all the threads.  Wake them all: hold this CPU so one
 	 * doesn't hit this CPU until we're ready. */
-	cpu = get_cpu();
+	get_cpu();
 	for_each_online_cpu(i)
 		wake_up_process(threads[i]);
 
@@ -177,15 +176,15 @@ kill_threads:
 	return err;
 }
 
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
 {
 	int ret;
 
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
-	ret = __stop_machine_run(fn, data, cpu);
+	ret = __stop_machine(fn, data, cpus);
 	put_online_cpus();
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(stop_machine_run);
+EXPORT_SYMBOL_GPL(stop_machine);
-- 
cgit v1.2.3


From 9403540c0653122ca34884a180439ddbfcbcb524 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Wed, 21 May 2008 16:50:46 +1000
Subject: dcache: Add case-insensitive support d_ci_add() routine

This add a dcache entry to the dcache for lookup, but changing the name
that is associated with the entry rather than the one passed in to the
lookup routine.

First, it sees if the case-exact match already exists in the dcache and
uses it if one exists. Otherwise, it allocates a new node with the new
name and splices it into the dcache.

Original code from ntfs_lookup in fs/ntfs/namei.c by Anton Altaparmakov.

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
Acked-by: Christoph Hellwig <hch@infradead.org>
---
 fs/dcache.c            | 102 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h |   1 +
 2 files changed, 103 insertions(+)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index f2584d22cb45..101663d15e9f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 	return new;
 }
 
+/**
+ * d_add_ci - lookup or allocate new dentry with case-exact name
+ * @inode:  the inode case-insensitive lookup has found
+ * @dentry: the negative dentry that was passed to the parent's lookup func
+ * @name:   the case-exact name to be associated with the returned dentry
+ *
+ * This is to avoid filling the dcache with case-insensitive names to the
+ * same inode, only the actual correct case is stored in the dcache for
+ * case-insensitive filesystems.
+ *
+ * For a case-insensitive lookup match and if the the case-exact dentry
+ * already exists in in the dcache, use it and return it.
+ *
+ * If no entry exists with the exact case name, allocate new dentry with
+ * the exact case, and return the spliced entry.
+ */
+struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry,
+			struct qstr *name)
+{
+	int error;
+	struct dentry *found;
+	struct dentry *new;
+
+	/* Does a dentry matching the name exist already? */
+	found = d_hash_and_lookup(dentry->d_parent, name);
+	/* If not, create it now and return */
+	if (!found) {
+		new = d_alloc(dentry->d_parent, name);
+		if (!new) {
+			error = -ENOMEM;
+			goto err_out;
+		}
+		found = d_splice_alias(inode, new);
+		if (found) {
+			dput(new);
+			return found;
+		}
+		return new;
+	}
+	/* Matching dentry exists, check if it is negative. */
+	if (found->d_inode) {
+		if (unlikely(found->d_inode != inode)) {
+			/* This can't happen because bad inodes are unhashed. */
+			BUG_ON(!is_bad_inode(inode));
+			BUG_ON(!is_bad_inode(found->d_inode));
+		}
+		/*
+		 * Already have the inode and the dentry attached, decrement
+		 * the reference count to balance the iget() done
+		 * earlier on.  We found the dentry using d_lookup() so it
+		 * cannot be disconnected and thus we do not need to worry
+		 * about any NFS/disconnectedness issues here.
+		 */
+		iput(inode);
+		return found;
+	}
+	/*
+	 * Negative dentry: instantiate it unless the inode is a directory and
+	 * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
+	 * in which case d_move() that in place of the found dentry.
+	 */
+	if (!S_ISDIR(inode->i_mode)) {
+		/* Not a directory; everything is easy. */
+		d_instantiate(found, inode);
+		return found;
+	}
+	spin_lock(&dcache_lock);
+	if (list_empty(&inode->i_dentry)) {
+		/*
+		 * Directory without a 'disconnected' dentry; we need to do
+		 * d_instantiate() by hand because it takes dcache_lock which
+		 * we already hold.
+		 */
+		list_add(&found->d_alias, &inode->i_dentry);
+		found->d_inode = inode;
+		spin_unlock(&dcache_lock);
+		security_d_instantiate(found, inode);
+		return found;
+	}
+	/*
+	 * Directory with a 'disconnected' dentry; get a reference to the
+	 * 'disconnected' dentry.
+	 */
+	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+	dget_locked(new);
+	spin_unlock(&dcache_lock);
+	/* Do security vodoo. */
+	security_d_instantiate(found, inode);
+	/* Move new in place of found. */
+	d_move(new, found);
+	/* Balance the iget() we did above. */
+	iput(inode);
+	/* Throw away found. */
+	dput(found);
+	/* Use new as the actual dentry. */
+	return new;
+
+err_out:
+	iput(inode);
+	return ERR_PTR(error);
+}
 
 /**
  * d_lookup - search for a dentry
@@ -2254,6 +2355,7 @@ EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
 EXPORT_SYMBOL(d_splice_alias);
+EXPORT_SYMBOL(d_add_ci);
 EXPORT_SYMBOL(d_validate);
 EXPORT_SYMBOL(dget_locked);
 EXPORT_SYMBOL(dput);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 98202c672fde..07aa198f19ed 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -230,6 +230,7 @@ extern void d_delete(struct dentry *);
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_anon(struct inode *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+extern struct dentry * d_add_ci(struct inode *, struct dentry *, struct qstr *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
 extern void shrink_dcache_for_umount(struct super_block *);
-- 
cgit v1.2.3


From 306cfd630a4d121cf4e08b894d8b4c4cf106e57e Mon Sep 17 00:00:00 2001
From: Adrian McMenamin <adrian@newgolddream.dyndns.info>
Date: Sun, 15 Jun 2008 20:48:09 +0100
Subject: maple: tidy maple_driver code by removing redundant
 connect/disconnect

The connect and disconnect functions are unnecessary - everything they do can be
accomplished in the initial probe - so remove them.

Signed-off-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/maple.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple.h b/include/linux/maple.h
index d31e36ebb436..523a286bb477 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -61,8 +61,6 @@ struct maple_device {
 
 struct maple_driver {
 	unsigned long function;
-	int (*connect) (struct maple_device * dev);
-	void (*disconnect) (struct maple_device * dev);
 	struct device_driver drv;
 };
 
-- 
cgit v1.2.3


From 7f71ac9374fec066e428892a68db158946cee1fb Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Mon, 28 Jul 2008 18:29:09 +0200
Subject: mfd: Coding style fixes

Fix some coding style fixes in the mfd core driver.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/mfd-core.c   | 15 +++++++--------
 include/linux/mfd/core.h | 17 ++++++++---------
 2 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 4dc861a7ac56..50207700140c 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -16,9 +16,9 @@
 #include <linux/mfd/core.h>
 
 static int mfd_add_device(struct platform_device *parent,
-		const struct mfd_cell *cell,
-		struct resource *mem_base,
-		int irq_base)
+			  const struct mfd_cell *cell,
+			  struct resource *mem_base,
+			  int irq_base)
 {
 	struct resource res[cell->num_resources];
 	struct platform_device *pdev;
@@ -75,11 +75,10 @@ fail_alloc:
 	return ret;
 }
 
-int mfd_add_devices(
-		struct platform_device *parent,
-		const struct mfd_cell *cells, int n_devs,
-		struct resource *mem_base,
-		int irq_base)
+int mfd_add_devices(struct platform_device *parent,
+		    const struct mfd_cell *cells, int n_devs,
+		    struct resource *mem_base,
+		    int irq_base)
 {
 	int i;
 	int ret = 0;
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index bb3dd0545928..b7cbb9968339 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -1,5 +1,3 @@
-#ifndef MFD_CORE_H
-#define MFD_CORE_H
 /*
  * drivers/mfd/mfd-core.h
  *
@@ -13,6 +11,9 @@
  *
  */
 
+#ifndef MFD_CORE_H
+#define MFD_CORE_H
+
 #include <linux/platform_device.h>
 
 /*
@@ -38,17 +39,15 @@ struct mfd_cell {
 	const struct resource	*resources;
 };
 
-static inline struct mfd_cell *
-mfd_get_cell(struct platform_device *pdev)
+static inline struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
 {
 	return (struct mfd_cell *)pdev->dev.platform_data;
 }
 
-extern int mfd_add_devices(
-		struct platform_device *parent,
-		const struct mfd_cell *cells, int n_devs,
-		struct resource *mem_base,
-		int irq_base);
+extern int mfd_add_devices(struct platform_device *parent,
+			   const struct mfd_cell *cells, int n_devs,
+			   struct resource *mem_base,
+			   int irq_base);
 
 extern void mfd_remove_devices(struct platform_device *parent);
 
-- 
cgit v1.2.3


From e56b3bc7942982ac2589c942fb345e38bc7a341a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 28 Jul 2008 11:32:33 -0700
Subject: cpu masks: optimize and clean up cpumask_of_cpu()

Clean up and optimize cpumask_of_cpu(), by sharing all the zero words.

Instead of stupidly generating all possible i=0...NR_CPUS 2^i patterns
creating a huge array of constant bitmasks, realize that the zero words
can be shared.

In other words, on a 64-bit architecture, we only ever need 64 of these
arrays - with a different bit set in one single world (with enough zero
words around it so that we can create any bitmask by just offsetting in
that big array). And then we just put enough zeroes around it that we
can point every single cpumask to be one of those things.

So when we have 4k CPU's, instead of having 4k arrays (of 4k bits each,
with one bit set in each array - 2MB memory total), we have exactly 64
arrays instead, each 8k bits in size (64kB total).

And then we just point cpumask(n) to the right position (which we can
calculate dynamically). Once we have the right arrays, getting
"cpumask(n)" ends up being:

  static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
  {
          const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
          p -= cpu / BITS_PER_LONG;
          return (const cpumask_t *)p;
  }

This brings other advantages and simplifications as well:

 - we are not wasting memory that is just filled with a single bit in
   various different places

 - we don't need all those games to re-create the arrays in some dense
   format, because they're already going to be dense enough.

if we compile a kernel for up to 4k CPU's, "wasting" that 64kB of memory
is a non-issue (especially since by doing this "overlapping" trick we
probably get better cache behaviour anyway).

[ mingo@elte.hu:

  Converted Linus's mails into a commit. See:

     http://lkml.org/lkml/2008/7/27/156
     http://lkml.org/lkml/2008/7/28/320

  Also applied a family filter - which also has the side-effect of leaving
  out the bits where Linus calls me an idio... Oh, never mind ;-)
]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c |  23 --------
 include/linux/cpumask.h        |  26 ++++++++-
 kernel/cpu.c                   | 128 +++++++----------------------------------
 3 files changed, 43 insertions(+), 134 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1cd53dfcd309..76e305e064f9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,26 +80,6 @@ static void __init setup_per_cpu_maps(void)
 #endif
 }
 
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-/*
- * Replace static cpumask_of_cpu_map in the initdata section,
- * with one that's allocated sized by the possible number of cpus.
- *
- * (requires nr_cpu_ids to be initialized)
- */
-static void __init setup_cpumask_of_cpu(void)
-{
-	int i;
-
-	/* alloc_bootmem zeroes memory */
-	cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
-	for (i = 0; i < nr_cpu_ids; i++)
-		cpu_set(i, cpumask_of_cpu_map[i]);
-}
-#else
-static inline void setup_cpumask_of_cpu(void) { }
-#endif
-
 #ifdef CONFIG_X86_32
 /*
  * Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -199,9 +179,6 @@ void __init setup_per_cpu_areas(void)
 
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
-
-	/* Setup cpumask_of_cpu map */
-	setup_cpumask_of_cpu();
 }
 
 #endif
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 8fa3b6d4a320..96d0509fb8d8 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -265,10 +265,30 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
 }
 
+/*
+ * Special-case data structure for "single bit set only" constant CPU masks.
+ *
+ * We pre-generate all the 64 (or 32) possible bit positions, with enough
+ * padding to the left and the right, and return the constant pointer
+ * appropriately offset.
+ */
+extern const unsigned long
+	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
+
+static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
+{
+	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
+	p -= cpu / BITS_PER_LONG;
+	return (const cpumask_t *)p;
+}
+
+/*
+ * In cases where we take the address of the cpumask immediately,
+ * gcc optimizes it out (it's a constant) and there's no huge stack
+ * variable created:
+ */
+#define cpumask_of_cpu(cpu) ({ *get_cpu_mask(cpu); })
 
-/* cpumask_of_cpu_map[] is in kernel/cpu.c */
-extern const cpumask_t *cpumask_of_cpu_map;
-#define cpumask_of_cpu(cpu)	(cpumask_of_cpu_map[cpu])
 
 #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a35d8995dc8c..06a8358bb418 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -462,115 +462,27 @@ out:
 
 #endif /* CONFIG_SMP */
 
-/* 64 bits of zeros, for initializers. */
-#if BITS_PER_LONG == 32
-#define Z64 0, 0
-#else
-#define Z64 0
-#endif
+/*
+ * cpu_bit_bitmap[] is a special, "compressed" data structure that
+ * represents all NR_CPUS bits binary values of 1<<nr.
+ *
+ * It is used by cpumask_of_cpu() to get a constant address to a CPU
+ * mask value that has a single bit set only.
+ */
 
-/* Initializer macros. */
-#define CMI0(n) { .bits = { 1UL << (n) } }
-#define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } }
-
-#define CMI8(n, ...)						\
-	CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__),		\
-	CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__),	\
-	CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__),	\
-	CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)
-
-#if BITS_PER_LONG == 32
-#define CMI64(n, ...)							\
-	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
-	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
-	CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__),	\
-	CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
-#else
-#define CMI64(n, ...)							\
-	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
-	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
-	CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__),	\
-	CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
-#endif
+/* cpu_bit_bitmap[0] is empty - so we can back into it */
+#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
+#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
+#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
+#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
 
-#define CMI256(n, ...)							\
-	CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__),	\
-	CMI64((n)+128, Z64, Z64, __VA_ARGS__),				\
-	CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__)
-#define Z256 Z64, Z64, Z64, Z64
-
-#define CMI1024(n, ...)					\
-	CMI256((n), __VA_ARGS__),			\
-	CMI256((n)+256, Z256, __VA_ARGS__),		\
-	CMI256((n)+512, Z256, Z256, __VA_ARGS__),	\
-	CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
-#define Z1024 Z256, Z256, Z256, Z256
-
-/* We want this statically initialized, just to be safe.  We try not
- * to waste too much space, either. */
-static const cpumask_t cpumask_map[]
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-__initdata
-#endif
-= {
-	CMI0(0), CMI0(1), CMI0(2), CMI0(3),
-#if NR_CPUS > 4
-	CMI0(4), CMI0(5), CMI0(6), CMI0(7),
-#endif
-#if NR_CPUS > 8
-	CMI0(8), CMI0(9), CMI0(10), CMI0(11),
-	CMI0(12), CMI0(13), CMI0(14), CMI0(15),
-#endif
-#if NR_CPUS > 16
-	CMI0(16), CMI0(17), CMI0(18), CMI0(19),
-	CMI0(20), CMI0(21), CMI0(22), CMI0(23),
-	CMI0(24), CMI0(25), CMI0(26), CMI0(27),
-	CMI0(28), CMI0(29), CMI0(30), CMI0(31),
-#endif
-#if NR_CPUS > 32
-#if BITS_PER_LONG == 32
-	CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
-	CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
-	CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
-	CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
-	CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
-	CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
-	CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
-	CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
-#else
-	CMI0(32), CMI0(33), CMI0(34), CMI0(35),
-	CMI0(36), CMI0(37), CMI0(38), CMI0(39),
-	CMI0(40), CMI0(41), CMI0(42), CMI0(43),
-	CMI0(44), CMI0(45), CMI0(46), CMI0(47),
-	CMI0(48), CMI0(49), CMI0(50), CMI0(51),
-	CMI0(52), CMI0(53), CMI0(54), CMI0(55),
-	CMI0(56), CMI0(57), CMI0(58), CMI0(59),
-	CMI0(60), CMI0(61), CMI0(62), CMI0(63),
-#endif /* BITS_PER_LONG == 64 */
-#endif
-#if NR_CPUS > 64
-	CMI64(64, Z64),
-#endif
-#if NR_CPUS > 128
-	CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
-#endif
-#if NR_CPUS > 256
-	CMI256(256, Z256),
-#endif
-#if NR_CPUS > 512
-	CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
-#endif
-#if NR_CPUS > 1024
-	CMI1024(1024, Z1024),
-#endif
-#if NR_CPUS > 2048
-	CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
-#endif
-#if NR_CPUS > 4096
-#error NR_CPUS too big.  Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
+const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
+
+	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
+	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
+#if BITS_PER_LONG > 32
+	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
+	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
 #endif
 };
-
-const cpumask_t *cpumask_of_cpu_map = cpumask_map;
-
-EXPORT_SYMBOL_GPL(cpumask_of_cpu_map);
+EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
-- 
cgit v1.2.3


From 5fde244d39b88625ac578d83e6625138714de031 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Wed, 23 Jul 2008 10:32:24 +0800
Subject: PCI: disable ASPM per ACPI FADT setting

The ACPI FADT table includes an ASPM control bit. If the bit is set, do
not enable ASPM since it may indicate that the platform doesn't actually
support the feature.

Tested-by: Jack Howarth <howarth@bromo.msbb.uc.edu>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-acpi.c   | 7 +++++++
 drivers/pci/pcie/aspm.c  | 5 +++++
 include/acpi/actbl.h     | 1 +
 include/linux/pci-aspm.h | 5 +++++
 4 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 7764768b6a0e..89a2f0fa10f9 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/module.h>
+#include <linux/pci-aspm.h>
 #include <acpi/acpi.h>
 #include <acpi/acnamesp.h>
 #include <acpi/acresrc.h>
@@ -372,6 +373,12 @@ static int __init acpi_pci_init(void)
 		printk(KERN_INFO"ACPI FADT declares the system doesn't support MSI, so disable it\n");
 		pci_no_msi();
 	}
+
+	if (acpi_gbl_FADT.boot_flags & BAF_PCIE_ASPM_CONTROL) {
+		printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
+		pcie_no_aspm();
+	}
+
 	ret = register_acpi_bus_type(&acpi_pci_bus);
 	if (ret)
 		return 0;
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index f82495583e63..759c51a4e399 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -808,6 +808,11 @@ static int __init pcie_aspm_disable(char *str)
 
 __setup("pcie_noaspm", pcie_aspm_disable);
 
+void pcie_no_aspm(void)
+{
+	aspm_disabled = 1;
+}
+
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #include <linux/pci-acpi.h>
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 1ebbe883f786..13a3d9ad92db 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -277,6 +277,7 @@ enum acpi_prefered_pm_profiles {
 #define BAF_LEGACY_DEVICES              0x0001
 #define BAF_8042_KEYBOARD_CONTROLLER    0x0002
 #define BAF_MSI_NOT_SUPPORTED           0x0008
+#define BAF_PCIE_ASPM_CONTROL           0x0010
 
 #define FADT2_REVISION_ID               3
 #define FADT2_MINUS_REVISION_ID         2
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index a1a1e618e996..91ba0b338b47 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -27,6 +27,7 @@ extern void pcie_aspm_init_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
+extern void pcie_no_aspm(void);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
 {
@@ -40,6 +41,10 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev)
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }
+
+static inline void pcie_no_aspm(void)
+{
+}
 #endif
 
 #ifdef CONFIG_PCIEASPM_DEBUG /* this depends on CONFIG_PCIEASPM */
-- 
cgit v1.2.3


From 149e16372a2066c5474d8a8db9b252afd57eb427 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Wed, 23 Jul 2008 10:32:31 +0800
Subject: PCI: disable ASPM on pre-1.1 PCIe devices

Disable ASPM on pre-1.1 PCIe devices, as many of them don't implement it
correctly.

Tested-by: Jack Howarth <howarth@bromo.msbb.uc.edu>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/aspm.c  | 13 +++++++++++++
 drivers/pci/probe.c      |  3 ++-
 include/linux/pci_regs.h |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 759c51a4e399..704605298c5e 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -510,6 +510,7 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 {
 	struct pci_dev *child_dev;
 	int child_pos;
+	u32 reg32;
 
 	/*
 	 * Some functions in a slot might not all be PCIE functions, very
@@ -519,6 +520,18 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 		child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
 		if (!child_pos)
 			return -EINVAL;
+
+		/*
+		 * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
+		 * RBER bit to determine if a function is 1.1 version device
+		 */
+		pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
+			&reg32);
+		if (!(reg32 & PCI_EXP_DEVCAP_RBER)) {
+			printk("Pre-1.1 PCIe device detected, "
+				"disable ASPM for %s\n", pci_name(pdev));
+			return -EINVAL;
+		}
 	}
 	return 0;
 }
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 203630065839..7098dfb07449 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1057,7 +1057,8 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
 		}
 	}
 
-	if (bus->self)
+	/* only one slot has pcie device */
+	if (bus->self && nr)
 		pcie_aspm_init_link_state(bus->self);
 
 	return nr;
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 19958b929905..450684f7eaac 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -374,6 +374,7 @@
 #define  PCI_EXP_DEVCAP_ATN_BUT	0x1000	/* Attention Button Present */
 #define  PCI_EXP_DEVCAP_ATN_IND	0x2000	/* Attention Indicator Present */
 #define  PCI_EXP_DEVCAP_PWR_IND	0x4000	/* Power Indicator Present */
+#define  PCI_EXP_DEVCAP_RBER	0x8000	/* Role-Based Error Reporting */
 #define  PCI_EXP_DEVCAP_PWR_VAL	0x3fc0000 /* Slot Power Limit Value */
 #define  PCI_EXP_DEVCAP_PWR_SCL	0xc000000 /* Slot Power Limit Scale */
 #define PCI_EXP_DEVCTL		8	/* Device Control */
-- 
cgit v1.2.3


From 979b1791e5b8f8b556faeec4c48339e7ed63af9f Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 24 Jul 2008 17:18:38 +0100
Subject: PCI: add D3 power state avoidance quirk

Libata has some hacks to deal with certain controllers going silly in D3
state. The right way to handle this is to keep a PCI device flag for
such devices. That can then be generalised for no ATA devices with power
problems.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c    |  4 ++++
 drivers/pci/quirks.c | 13 +++++++++++++
 include/linux/pci.h  |  2 ++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c95f77d65718..0a3d856833fc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -572,6 +572,10 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
 		if (!ret)
 			pci_update_current_state(dev);
 	}
+	/* This device is quirked not to be put into D3, so
+	   don't put it in D3 */
+	if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
+		return 0;
 
 	error = pci_raw_set_power_state(dev, state);
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 12d489395fad..0fb365074288 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -923,6 +923,19 @@ static void __init quirk_ide_samemode(struct pci_dev *pdev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, quirk_ide_samemode);
 
+/*
+ * Some ATA devices break if put into D3
+ */
+
+static void __devinit quirk_no_ata_d3(struct pci_dev *pdev)
+{
+	/* Quirk the legacy ATA devices only. The AHCI ones are ok */
+	if ((pdev->class >> 8) == PCI_CLASS_STORAGE_IDE)
+		pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3);
+
 /* This was originally an Alpha specific thing, but it really fits here.
  * The i82375 PCI/EISA bridge appears as non-classified. Fix that.
  */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1d296d31abe0..825be3878f68 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -124,6 +124,8 @@ enum pci_dev_flags {
 	 * generation too.
 	 */
 	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
+	/* Device configuration is irrevocably lost if disabled into D3 */
+	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
 };
 
 typedef unsigned short __bitwise pci_bus_flags_t;
-- 
cgit v1.2.3


From 56edb58be157a06dc147a988af3588059556d392 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 29 Jul 2008 01:23:32 +0200
Subject: mfd: add platform_data to mfd_cell

Adding platform_data to mfd_cell allows passing of platform data directly
to the platform_device created for each cell and thus reuse of existing
drivers.
On the other side it can be used as a hook to mfd_cell itself
removing the need in mfd_get_cell method.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Acked-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/mfd-core.c   |  2 +-
 drivers/mfd/tc6393xb.c   |  4 ++++
 include/linux/mfd/core.h | 13 +++++++------
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 50207700140c..ad4e4d16a36a 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -32,7 +32,7 @@ static int mfd_add_device(struct platform_device *parent,
 	pdev->dev.parent = &parent->dev;
 
 	ret = platform_device_add_data(pdev,
-			cell, sizeof(struct mfd_cell));
+			cell->platform_data, cell->data_size);
 	if (ret)
 		goto fail_device;
 
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 94e55e8e7ce6..9908aaa4881a 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -466,6 +466,10 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 		tc6393xb_attach_irq(dev);
 
 	tc6393xb_cells[TC6393XB_CELL_NAND].driver_data = tcpd->nand_data;
+	tc6393xb_cells[TC6393XB_CELL_NAND].platform_data =
+		&tc6393xb_cells[TC6393XB_CELL_NAND];
+	tc6393xb_cells[TC6393XB_CELL_NAND].data_size =
+		sizeof(tc6393xb_cells[TC6393XB_CELL_NAND]);
 
 	retval = mfd_add_devices(dev,
 			tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index b7cbb9968339..ea45d4a5a2ac 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -29,7 +29,13 @@ struct mfd_cell {
 	int			(*suspend)(struct platform_device *dev);
 	int			(*resume)(struct platform_device *dev);
 
-	void			*driver_data; /* driver-specific data */
+	/* driver-specific data for MFD-aware "cell" drivers */
+	void			*driver_data;
+
+	/* platform_data can be used to either pass data to "generic"
+	   driver or as a hook to mfd_cell for the "cell" drivers */
+	void			*platform_data;
+	size_t			data_size;
 
 	/*
 	 * This resources can be specified relatievly to the parent device.
@@ -39,11 +45,6 @@ struct mfd_cell {
 	const struct resource	*resources;
 };
 
-static inline struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
-{
-	return (struct mfd_cell *)pdev->dev.platform_data;
-}
-
 extern int mfd_add_devices(struct platform_device *parent,
 			   const struct mfd_cell *cells, int n_devs,
 			   struct resource *mem_base,
-- 
cgit v1.2.3


From 6beeac76f5f96590fb751af5e138fbc3f62e8460 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@qumranet.com>
Date: Mon, 28 Jul 2008 15:46:22 -0700
Subject: mmu-notifiers: add list_del_init_rcu()

Introduce list_del_init_rcu() and document it.

Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Robin Holt <holt@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Kanoj Sarcar <kanojsarcar@yahoo.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Chris Wright <chrisw@redhat.com>
Cc: Marcelo Tosatti <marcelo@kvack.org>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Izik Eidus <izike@qumranet.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rculist.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index b0f39be08b6c..eb4443c7e05b 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -97,6 +97,34 @@ static inline void list_del_rcu(struct list_head *entry)
 	entry->prev = LIST_POISON2;
 }
 
+/**
+ * hlist_del_init_rcu - deletes entry from hash list with re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: list_unhashed() on the node return true after this. It is
+ * useful for RCU based read lockfree traversal if the writer side
+ * must know if the list entry is still hashed or already unhashed.
+ *
+ * In particular, it means that we can not poison the forward pointers
+ * that may still be used for walking the hash list and we can only
+ * zero the pprev pointer so list_unhashed() will return true after
+ * this.
+ *
+ * The caller must take whatever precautions are necessary (such as
+ * holding appropriate locks) to avoid racing with another
+ * list-mutation primitive, such as hlist_add_head_rcu() or
+ * hlist_del_rcu(), running on this same list.  However, it is
+ * perfectly legal to run concurrently with the _rcu list-traversal
+ * primitives, such as hlist_for_each_entry_rcu().
+ */
+static inline void hlist_del_init_rcu(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		n->pprev = NULL;
+	}
+}
+
 /**
  * list_replace_rcu - replace old entry by new one
  * @old : the element to be replaced
-- 
cgit v1.2.3


From 7906d00cd1f687268f0a3599442d113767795ae6 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@qumranet.com>
Date: Mon, 28 Jul 2008 15:46:26 -0700
Subject: mmu-notifiers: add mm_take_all_locks() operation

mm_take_all_locks holds off reclaim from an entire mm_struct.  This allows
mmu notifiers to register into the mm at any time with the guarantee that
no mmu operation is in progress on the mm.

This operation locks against the VM for all pte/vma/mm related operations
that could ever happen on a certain mm.  This includes vmtruncate,
try_to_unmap, and all page faults.

The caller must take the mmap_sem in write mode before calling
mm_take_all_locks().  The caller isn't allowed to release the mmap_sem
until mm_drop_all_locks() returns.

mmap_sem in write mode is required in order to block all operations that
could modify pagetables and free pages without need of altering the vma
layout (for example populate_range() with nonlinear vmas).  It's also
needed in write mode to avoid new anon_vmas to be associated with existing
vmas.

A single task can't take more than one mm_take_all_locks() in a row or it
would deadlock.

mm_take_all_locks() and mm_drop_all_locks are expensive operations that
may have to take thousand of locks.

mm_take_all_locks() can fail if it's interrupted by signals.

When mmu_notifier_register returns, we must be sure that the driver is
notified if some task is in the middle of a vmtruncate for the 'mm' where
the mmu notifier was registered (mmu_notifier_invalidate_range_start/end
is run around the vmtruncation but mmu_notifier_register can run after
mmu_notifier_invalidate_range_start and before
mmu_notifier_invalidate_range_end).  Same problem for rmap paths.  And
we've to remove page pinning to avoid replicating the tlb_gather logic
inside KVM (and GRU doesn't work well with page pinning regardless of
needing tlb_gather), so without mm_take_all_locks when vmtruncate frees
the page, kvm would have no way to notice that it mapped into sptes a page
that is going into the freelist without a chance of any further
mmu_notifier notification.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Robin Holt <holt@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Kanoj Sarcar <kanojsarcar@yahoo.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Chris Wright <chrisw@redhat.com>
Cc: Marcelo Tosatti <marcelo@kvack.org>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Izik Eidus <izike@qumranet.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h      |   3 +
 include/linux/pagemap.h |   1 +
 include/linux/rmap.h    |   8 +++
 mm/mmap.c               | 158 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 170 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6e695eaab4ce..866a3dbe5c75 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1104,6 +1104,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	unsigned long addr, unsigned long len, pgoff_t pgoff);
 extern void exit_mmap(struct mm_struct *);
 
+extern int mm_take_all_locks(struct mm_struct *mm);
+extern void mm_drop_all_locks(struct mm_struct *mm);
+
 #ifdef CONFIG_PROC_FS
 /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
 extern void added_exe_file_vma(struct mm_struct *mm);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a81d81890422..a39b38ccdc97 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -20,6 +20,7 @@
  */
 #define	AS_EIO		(__GFP_BITS_SHIFT + 0)	/* IO error on async write */
 #define AS_ENOSPC	(__GFP_BITS_SHIFT + 1)	/* ENOSPC on async write */
+#define AS_MM_ALL_LOCKS	(__GFP_BITS_SHIFT + 2)	/* under mm_take_all_locks() */
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
 {
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1383692ac5bd..69407f85e10b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -26,6 +26,14 @@
  */
 struct anon_vma {
 	spinlock_t lock;	/* Serialize access to vma list */
+	/*
+	 * NOTE: the LSB of the head.next is set by
+	 * mm_take_all_locks() _after_ taking the above lock. So the
+	 * head must only be read/written after taking the above lock
+	 * to be sure to see a valid next pointer. The LSB bit itself
+	 * is serialized by a system wide lock only visible to
+	 * mm_take_all_locks() (mm_all_locks_mutex).
+	 */
 	struct list_head head;	/* List of private "related" vmas */
 };
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 5e0cc99e9cd5..e5f9cb83d6d4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2268,3 +2268,161 @@ int install_special_mapping(struct mm_struct *mm,
 
 	return 0;
 }
+
+static DEFINE_MUTEX(mm_all_locks_mutex);
+
+static void vm_lock_anon_vma(struct anon_vma *anon_vma)
+{
+	if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+		/*
+		 * The LSB of head.next can't change from under us
+		 * because we hold the mm_all_locks_mutex.
+		 */
+		spin_lock(&anon_vma->lock);
+		/*
+		 * We can safely modify head.next after taking the
+		 * anon_vma->lock. If some other vma in this mm shares
+		 * the same anon_vma we won't take it again.
+		 *
+		 * No need of atomic instructions here, head.next
+		 * can't change from under us thanks to the
+		 * anon_vma->lock.
+		 */
+		if (__test_and_set_bit(0, (unsigned long *)
+				       &anon_vma->head.next))
+			BUG();
+	}
+}
+
+static void vm_lock_mapping(struct address_space *mapping)
+{
+	if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+		/*
+		 * AS_MM_ALL_LOCKS can't change from under us because
+		 * we hold the mm_all_locks_mutex.
+		 *
+		 * Operations on ->flags have to be atomic because
+		 * even if AS_MM_ALL_LOCKS is stable thanks to the
+		 * mm_all_locks_mutex, there may be other cpus
+		 * changing other bitflags in parallel to us.
+		 */
+		if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
+			BUG();
+		spin_lock(&mapping->i_mmap_lock);
+	}
+}
+
+/*
+ * This operation locks against the VM for all pte/vma/mm related
+ * operations that could ever happen on a certain mm. This includes
+ * vmtruncate, try_to_unmap, and all page faults.
+ *
+ * The caller must take the mmap_sem in write mode before calling
+ * mm_take_all_locks(). The caller isn't allowed to release the
+ * mmap_sem until mm_drop_all_locks() returns.
+ *
+ * mmap_sem in write mode is required in order to block all operations
+ * that could modify pagetables and free pages without need of
+ * altering the vma layout (for example populate_range() with
+ * nonlinear vmas). It's also needed in write mode to avoid new
+ * anon_vmas to be associated with existing vmas.
+ *
+ * A single task can't take more than one mm_take_all_locks() in a row
+ * or it would deadlock.
+ *
+ * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
+ * mapping->flags avoid to take the same lock twice, if more than one
+ * vma in this mm is backed by the same anon_vma or address_space.
+ *
+ * We can take all the locks in random order because the VM code
+ * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * takes more than one of them in a row. Secondly we're protected
+ * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
+ *
+ * mm_take_all_locks() and mm_drop_all_locks are expensive operations
+ * that may have to take thousand of locks.
+ *
+ * mm_take_all_locks() can fail if it's interrupted by signals.
+ */
+int mm_take_all_locks(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	int ret = -EINTR;
+
+	BUG_ON(down_read_trylock(&mm->mmap_sem));
+
+	mutex_lock(&mm_all_locks_mutex);
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (signal_pending(current))
+			goto out_unlock;
+		if (vma->anon_vma)
+			vm_lock_anon_vma(vma->anon_vma);
+		if (vma->vm_file && vma->vm_file->f_mapping)
+			vm_lock_mapping(vma->vm_file->f_mapping);
+	}
+	ret = 0;
+
+out_unlock:
+	if (ret)
+		mm_drop_all_locks(mm);
+
+	return ret;
+}
+
+static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
+{
+	if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+		/*
+		 * The LSB of head.next can't change to 0 from under
+		 * us because we hold the mm_all_locks_mutex.
+		 *
+		 * We must however clear the bitflag before unlocking
+		 * the vma so the users using the anon_vma->head will
+		 * never see our bitflag.
+		 *
+		 * No need of atomic instructions here, head.next
+		 * can't change from under us until we release the
+		 * anon_vma->lock.
+		 */
+		if (!__test_and_clear_bit(0, (unsigned long *)
+					  &anon_vma->head.next))
+			BUG();
+		spin_unlock(&anon_vma->lock);
+	}
+}
+
+static void vm_unlock_mapping(struct address_space *mapping)
+{
+	if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+		/*
+		 * AS_MM_ALL_LOCKS can't change to 0 from under us
+		 * because we hold the mm_all_locks_mutex.
+		 */
+		spin_unlock(&mapping->i_mmap_lock);
+		if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
+					&mapping->flags))
+			BUG();
+	}
+}
+
+/*
+ * The mmap_sem cannot be released by the caller until
+ * mm_drop_all_locks() returns.
+ */
+void mm_drop_all_locks(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+
+	BUG_ON(down_read_trylock(&mm->mmap_sem));
+	BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (vma->anon_vma)
+			vm_unlock_anon_vma(vma->anon_vma);
+		if (vma->vm_file && vma->vm_file->f_mapping)
+			vm_unlock_mapping(vma->vm_file->f_mapping);
+	}
+
+	mutex_unlock(&mm_all_locks_mutex);
+}
-- 
cgit v1.2.3


From cddb8a5c14aa89810b40495d94d3d2a0faee6619 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@qumranet.com>
Date: Mon, 28 Jul 2008 15:46:29 -0700
Subject: mmu-notifiers: core

With KVM/GFP/XPMEM there isn't just the primary CPU MMU pointing to pages.
 There are secondary MMUs (with secondary sptes and secondary tlbs) too.
sptes in the kvm case are shadow pagetables, but when I say spte in
mmu-notifier context, I mean "secondary pte".  In GRU case there's no
actual secondary pte and there's only a secondary tlb because the GRU
secondary MMU has no knowledge about sptes and every secondary tlb miss
event in the MMU always generates a page fault that has to be resolved by
the CPU (this is not the case of KVM where the a secondary tlb miss will
walk sptes in hardware and it will refill the secondary tlb transparently
to software if the corresponding spte is present).  The same way
zap_page_range has to invalidate the pte before freeing the page, the spte
(and secondary tlb) must also be invalidated before any page is freed and
reused.

Currently we take a page_count pin on every page mapped by sptes, but that
means the pages can't be swapped whenever they're mapped by any spte
because they're part of the guest working set.  Furthermore a spte unmap
event can immediately lead to a page to be freed when the pin is released
(so requiring the same complex and relatively slow tlb_gather smp safe
logic we have in zap_page_range and that can be avoided completely if the
spte unmap event doesn't require an unpin of the page previously mapped in
the secondary MMU).

The mmu notifiers allow kvm/GRU/XPMEM to attach to the tsk->mm and know
when the VM is swapping or freeing or doing anything on the primary MMU so
that the secondary MMU code can drop sptes before the pages are freed,
avoiding all page pinning and allowing 100% reliable swapping of guest
physical address space.  Furthermore it avoids the code that teardown the
mappings of the secondary MMU, to implement a logic like tlb_gather in
zap_page_range that would require many IPI to flush other cpu tlbs, for
each fixed number of spte unmapped.

To make an example: if what happens on the primary MMU is a protection
downgrade (from writeable to wrprotect) the secondary MMU mappings will be
invalidated, and the next secondary-mmu-page-fault will call
get_user_pages and trigger a do_wp_page through get_user_pages if it
called get_user_pages with write=1, and it'll re-establishing an updated
spte or secondary-tlb-mapping on the copied page.  Or it will setup a
readonly spte or readonly tlb mapping if it's a guest-read, if it calls
get_user_pages with write=0.  This is just an example.

This allows to map any page pointed by any pte (and in turn visible in the
primary CPU MMU), into a secondary MMU (be it a pure tlb like GRU, or an
full MMU with both sptes and secondary-tlb like the shadow-pagetable layer
with kvm), or a remote DMA in software like XPMEM (hence needing of
schedule in XPMEM code to send the invalidate to the remote node, while no
need to schedule in kvm/gru as it's an immediate event like invalidating
primary-mmu pte).

At least for KVM without this patch it's impossible to swap guests
reliably.  And having this feature and removing the page pin allows
several other optimizations that simplify life considerably.

Dependencies:

1) mm_take_all_locks() to register the mmu notifier when the whole VM
   isn't doing anything with "mm".  This allows mmu notifier users to keep
   track if the VM is in the middle of the invalidate_range_begin/end
   critical section with an atomic counter incraese in range_begin and
   decreased in range_end.  No secondary MMU page fault is allowed to map
   any spte or secondary tlb reference, while the VM is in the middle of
   range_begin/end as any page returned by get_user_pages in that critical
   section could later immediately be freed without any further
   ->invalidate_page notification (invalidate_range_begin/end works on
   ranges and ->invalidate_page isn't called immediately before freeing
   the page).  To stop all page freeing and pagetable overwrites the
   mmap_sem must be taken in write mode and all other anon_vma/i_mmap
   locks must be taken too.

2) It'd be a waste to add branches in the VM if nobody could possibly
   run KVM/GRU/XPMEM on the kernel, so mmu notifiers will only enabled if
   CONFIG_KVM=m/y.  In the current kernel kvm won't yet take advantage of
   mmu notifiers, but this already allows to compile a KVM external module
   against a kernel with mmu notifiers enabled and from the next pull from
   kvm.git we'll start using them.  And GRU/XPMEM will also be able to
   continue the development by enabling KVM=m in their config, until they
   submit all GRU/XPMEM GPLv2 code to the mainline kernel.  Then they can
   also enable MMU_NOTIFIERS in the same way KVM does it (even if KVM=n).
   This guarantees nobody selects MMU_NOTIFIER=y if KVM and GRU and XPMEM
   are all =n.

The mmu_notifier_register call can fail because mm_take_all_locks may be
interrupted by a signal and return -EINTR.  Because mmu_notifier_reigster
is used when a driver startup, a failure can be gracefully handled.  Here
an example of the change applied to kvm to register the mmu notifiers.
Usually when a driver startups other allocations are required anyway and
-ENOMEM failure paths exists already.

 struct  kvm *kvm_arch_create_vm(void)
 {
        struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+       int err;

        if (!kvm)
                return ERR_PTR(-ENOMEM);

        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);

+       kvm->arch.mmu_notifier.ops = &kvm_mmu_notifier_ops;
+       err = mmu_notifier_register(&kvm->arch.mmu_notifier, current->mm);
+       if (err) {
+               kfree(kvm);
+               return ERR_PTR(err);
+       }
+
        return kvm;
 }

mmu_notifier_unregister returns void and it's reliable.

The patch also adds a few needed but missing includes that would prevent
kernel to compile after these changes on non-x86 archs (x86 didn't need
them by luck).

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix mm/filemap_xip.c build]
[akpm@linux-foundation.org: fix mm/mmu_notifier.c build]
Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Robin Holt <holt@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Kanoj Sarcar <kanojsarcar@yahoo.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Chris Wright <chrisw@redhat.com>
Cc: Marcelo Tosatti <marcelo@kvack.org>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Izik Eidus <izike@qumranet.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kvm/Kconfig         |   1 +
 include/linux/mm_types.h     |   4 +
 include/linux/mmu_notifier.h | 279 +++++++++++++++++++++++++++++++++++++++++++
 kernel/fork.c                |   3 +
 mm/Kconfig                   |   3 +
 mm/Makefile                  |   1 +
 mm/filemap_xip.c             |   3 +-
 mm/fremap.c                  |   3 +
 mm/hugetlb.c                 |   3 +
 mm/memory.c                  |  35 +++++-
 mm/mmap.c                    |   2 +
 mm/mmu_notifier.c            | 277 ++++++++++++++++++++++++++++++++++++++++++
 mm/mprotect.c                |   3 +
 mm/mremap.c                  |   6 +
 mm/rmap.c                    |  13 +-
 15 files changed, 623 insertions(+), 13 deletions(-)
 create mode 100644 include/linux/mmu_notifier.h
 create mode 100644 mm/mmu_notifier.c

(limited to 'include/linux')

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8d45fabc5f3b..ce3251ce5504 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
 	select PREEMPT_NOTIFIERS
+	select MMU_NOTIFIER
 	select ANON_INODES
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 746f975b58ef..386edbe2cb4e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -10,6 +10,7 @@
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
 #include <linux/completion.h>
+#include <linux/cpumask.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 
@@ -253,6 +254,9 @@ struct mm_struct {
 	struct file *exe_file;
 	unsigned long num_exe_file_vmas;
 #endif
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_notifier_mm *mmu_notifier_mm;
+#endif
 };
 
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
new file mode 100644
index 000000000000..b77486d152cd
--- /dev/null
+++ b/include/linux/mmu_notifier.h
@@ -0,0 +1,279 @@
+#ifndef _LINUX_MMU_NOTIFIER_H
+#define _LINUX_MMU_NOTIFIER_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm_types.h>
+
+struct mmu_notifier;
+struct mmu_notifier_ops;
+
+#ifdef CONFIG_MMU_NOTIFIER
+
+/*
+ * The mmu notifier_mm structure is allocated and installed in
+ * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
+ * critical section and it's released only when mm_count reaches zero
+ * in mmdrop().
+ */
+struct mmu_notifier_mm {
+	/* all mmu notifiers registerd in this mm are queued in this list */
+	struct hlist_head list;
+	/* to serialize the list modifications and hlist_unhashed */
+	spinlock_t lock;
+};
+
+struct mmu_notifier_ops {
+	/*
+	 * Called either by mmu_notifier_unregister or when the mm is
+	 * being destroyed by exit_mmap, always before all pages are
+	 * freed. This can run concurrently with other mmu notifier
+	 * methods (the ones invoked outside the mm context) and it
+	 * should tear down all secondary mmu mappings and freeze the
+	 * secondary mmu. If this method isn't implemented you've to
+	 * be sure that nothing could possibly write to the pages
+	 * through the secondary mmu by the time the last thread with
+	 * tsk->mm == mm exits.
+	 *
+	 * As side note: the pages freed after ->release returns could
+	 * be immediately reallocated by the gart at an alias physical
+	 * address with a different cache model, so if ->release isn't
+	 * implemented because all _software_ driven memory accesses
+	 * through the secondary mmu are terminated by the time the
+	 * last thread of this mm quits, you've also to be sure that
+	 * speculative _hardware_ operations can't allocate dirty
+	 * cachelines in the cpu that could not be snooped and made
+	 * coherent with the other read and write operations happening
+	 * through the gart alias address, so leading to memory
+	 * corruption.
+	 */
+	void (*release)(struct mmu_notifier *mn,
+			struct mm_struct *mm);
+
+	/*
+	 * clear_flush_young is called after the VM is
+	 * test-and-clearing the young/accessed bitflag in the
+	 * pte. This way the VM will provide proper aging to the
+	 * accesses to the page through the secondary MMUs and not
+	 * only to the ones through the Linux pte.
+	 */
+	int (*clear_flush_young)(struct mmu_notifier *mn,
+				 struct mm_struct *mm,
+				 unsigned long address);
+
+	/*
+	 * Before this is invoked any secondary MMU is still ok to
+	 * read/write to the page previously pointed to by the Linux
+	 * pte because the page hasn't been freed yet and it won't be
+	 * freed until this returns. If required set_page_dirty has to
+	 * be called internally to this method.
+	 */
+	void (*invalidate_page)(struct mmu_notifier *mn,
+				struct mm_struct *mm,
+				unsigned long address);
+
+	/*
+	 * invalidate_range_start() and invalidate_range_end() must be
+	 * paired and are called only when the mmap_sem and/or the
+	 * locks protecting the reverse maps are held. The subsystem
+	 * must guarantee that no additional references are taken to
+	 * the pages in the range established between the call to
+	 * invalidate_range_start() and the matching call to
+	 * invalidate_range_end().
+	 *
+	 * Invalidation of multiple concurrent ranges may be
+	 * optionally permitted by the driver. Either way the
+	 * establishment of sptes is forbidden in the range passed to
+	 * invalidate_range_begin/end for the whole duration of the
+	 * invalidate_range_begin/end critical section.
+	 *
+	 * invalidate_range_start() is called when all pages in the
+	 * range are still mapped and have at least a refcount of one.
+	 *
+	 * invalidate_range_end() is called when all pages in the
+	 * range have been unmapped and the pages have been freed by
+	 * the VM.
+	 *
+	 * The VM will remove the page table entries and potentially
+	 * the page between invalidate_range_start() and
+	 * invalidate_range_end(). If the page must not be freed
+	 * because of pending I/O or other circumstances then the
+	 * invalidate_range_start() callback (or the initial mapping
+	 * by the driver) must make sure that the refcount is kept
+	 * elevated.
+	 *
+	 * If the driver increases the refcount when the pages are
+	 * initially mapped into an address space then either
+	 * invalidate_range_start() or invalidate_range_end() may
+	 * decrease the refcount. If the refcount is decreased on
+	 * invalidate_range_start() then the VM can free pages as page
+	 * table entries are removed.  If the refcount is only
+	 * droppped on invalidate_range_end() then the driver itself
+	 * will drop the last refcount but it must take care to flush
+	 * any secondary tlb before doing the final free on the
+	 * page. Pages will no longer be referenced by the linux
+	 * address space but may still be referenced by sptes until
+	 * the last refcount is dropped.
+	 */
+	void (*invalidate_range_start)(struct mmu_notifier *mn,
+				       struct mm_struct *mm,
+				       unsigned long start, unsigned long end);
+	void (*invalidate_range_end)(struct mmu_notifier *mn,
+				     struct mm_struct *mm,
+				     unsigned long start, unsigned long end);
+};
+
+/*
+ * The notifier chains are protected by mmap_sem and/or the reverse map
+ * semaphores. Notifier chains are only changed when all reverse maps and
+ * the mmap_sem locks are taken.
+ *
+ * Therefore notifier chains can only be traversed when either
+ *
+ * 1. mmap_sem is held.
+ * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
+ * 3. No other concurrent thread can access the list (release)
+ */
+struct mmu_notifier {
+	struct hlist_node hlist;
+	const struct mmu_notifier_ops *ops;
+};
+
+static inline int mm_has_notifiers(struct mm_struct *mm)
+{
+	return unlikely(mm->mmu_notifier_mm);
+}
+
+extern int mmu_notifier_register(struct mmu_notifier *mn,
+				 struct mm_struct *mm);
+extern int __mmu_notifier_register(struct mmu_notifier *mn,
+				   struct mm_struct *mm);
+extern void mmu_notifier_unregister(struct mmu_notifier *mn,
+				    struct mm_struct *mm);
+extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
+extern void __mmu_notifier_release(struct mm_struct *mm);
+extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
+					  unsigned long address);
+extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
+					  unsigned long address);
+extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end);
+extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
+				  unsigned long start, unsigned long end);
+
+static inline void mmu_notifier_release(struct mm_struct *mm)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_release(mm);
+}
+
+static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
+					  unsigned long address)
+{
+	if (mm_has_notifiers(mm))
+		return __mmu_notifier_clear_flush_young(mm, address);
+	return 0;
+}
+
+static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
+					  unsigned long address)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_invalidate_page(mm, address);
+}
+
+static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_invalidate_range_start(mm, start, end);
+}
+
+static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_invalidate_range_end(mm, start, end);
+}
+
+static inline void mmu_notifier_mm_init(struct mm_struct *mm)
+{
+	mm->mmu_notifier_mm = NULL;
+}
+
+static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_mm_destroy(mm);
+}
+
+/*
+ * These two macros will sometime replace ptep_clear_flush.
+ * ptep_clear_flush is impleemnted as macro itself, so this also is
+ * implemented as a macro until ptep_clear_flush will converted to an
+ * inline function, to diminish the risk of compilation failure. The
+ * invalidate_page method over time can be moved outside the PT lock
+ * and these two macros can be later removed.
+ */
+#define ptep_clear_flush_notify(__vma, __address, __ptep)		\
+({									\
+	pte_t __pte;							\
+	struct vm_area_struct *___vma = __vma;				\
+	unsigned long ___address = __address;				\
+	__pte = ptep_clear_flush(___vma, ___address, __ptep);		\
+	mmu_notifier_invalidate_page(___vma->vm_mm, ___address);	\
+	__pte;								\
+})
+
+#define ptep_clear_flush_young_notify(__vma, __address, __ptep)		\
+({									\
+	int __young;							\
+	struct vm_area_struct *___vma = __vma;				\
+	unsigned long ___address = __address;				\
+	__young = ptep_clear_flush_young(___vma, ___address, __ptep);	\
+	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
+						  ___address);		\
+	__young;							\
+})
+
+#else /* CONFIG_MMU_NOTIFIER */
+
+static inline void mmu_notifier_release(struct mm_struct *mm)
+{
+}
+
+static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
+					  unsigned long address)
+{
+	return 0;
+}
+
+static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
+					  unsigned long address)
+{
+}
+
+static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+}
+
+static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+}
+
+static inline void mmu_notifier_mm_init(struct mm_struct *mm)
+{
+}
+
+static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
+{
+}
+
+#define ptep_clear_flush_young_notify ptep_clear_flush_young
+#define ptep_clear_flush_notify ptep_clear_flush
+
+#endif /* CONFIG_MMU_NOTIFIER */
+
+#endif /* _LINUX_MMU_NOTIFIER_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8214ba7c8bb1..7ce2ebe84796 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -27,6 +27,7 @@
 #include <linux/key.h>
 #include <linux/binfmts.h>
 #include <linux/mman.h>
+#include <linux/mmu_notifier.h>
 #include <linux/fs.h>
 #include <linux/nsproxy.h>
 #include <linux/capability.h>
@@ -414,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
+		mmu_notifier_mm_init(mm);
 		return mm;
 	}
 
@@ -446,6 +448,7 @@ void __mmdrop(struct mm_struct *mm)
 	BUG_ON(mm == &init_mm);
 	mm_free_pgd(mm);
 	destroy_context(mm);
+	mmu_notifier_mm_destroy(mm);
 	free_mm(mm);
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
diff --git a/mm/Kconfig b/mm/Kconfig
index efee5d379df4..446c6588c753 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -208,3 +208,6 @@ config NR_QUICK
 config VIRT_TO_BUS
 	def_bool y
 	depends on !ARCH_NO_VIRT_TO_BUS
+
+config MMU_NOTIFIER
+	bool
diff --git a/mm/Makefile b/mm/Makefile
index 06ca2381fef1..da4ccf015aea 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SHMEM) += shmem.o
 obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_SLOB) += slob.o
+obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 98a3f31ccd6a..380ab402d711 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/uio.h>
 #include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
 #include <linux/sched.h>
 #include <asm/tlbflush.h>
 #include <asm/io.h>
@@ -188,7 +189,7 @@ __xip_unmap (struct address_space * mapping,
 		if (pte) {
 			/* Nuke the page table entry. */
 			flush_cache_page(vma, address, pte_pfn(*pte));
-			pteval = ptep_clear_flush(vma, address, pte);
+			pteval = ptep_clear_flush_notify(vma, address, pte);
 			page_remove_rmap(page, vma);
 			dec_mm_counter(mm, file_rss);
 			BUG_ON(pte_dirty(pteval));
diff --git a/mm/fremap.c b/mm/fremap.c
index 07a9c82ce1a3..7881638e4a12 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -15,6 +15,7 @@
 #include <linux/rmap.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
@@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 		spin_unlock(&mapping->i_mmap_lock);
 	}
 
+	mmu_notifier_invalidate_range_start(mm, start, start + size);
 	err = populate_range(mm, vma, start, size, pgoff);
+	mmu_notifier_invalidate_range_end(mm, start, start + size);
 	if (!err && !(flags & MAP_NONBLOCK)) {
 		if (unlikely(has_write_lock)) {
 			downgrade_write(&mm->mmap_sem);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3be79dc18c5c..80eb0d31d0d3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/sysctl.h>
 #include <linux/highmem.h>
+#include <linux/mmu_notifier.h>
 #include <linux/nodemask.h>
 #include <linux/pagemap.h>
 #include <linux/mempolicy.h>
@@ -1672,6 +1673,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	BUG_ON(start & ~huge_page_mask(h));
 	BUG_ON(end & ~huge_page_mask(h));
 
+	mmu_notifier_invalidate_range_start(mm, start, end);
 	spin_lock(&mm->page_table_lock);
 	for (address = start; address < end; address += sz) {
 		ptep = huge_pte_offset(mm, address);
@@ -1713,6 +1715,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	}
 	spin_unlock(&mm->page_table_lock);
 	flush_tlb_range(vma, start, end);
+	mmu_notifier_invalidate_range_end(mm, start, end);
 	list_for_each_entry_safe(page, tmp, &page_list, lru) {
 		list_del(&page->lru);
 		put_page(page);
diff --git a/mm/memory.c b/mm/memory.c
index a8ca04faaea6..67f0ab9077d9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -51,6 +51,7 @@
 #include <linux/init.h>
 #include <linux/writeback.h>
 #include <linux/memcontrol.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -652,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	unsigned long next;
 	unsigned long addr = vma->vm_start;
 	unsigned long end = vma->vm_end;
+	int ret;
 
 	/*
 	 * Don't copy ptes where a page fault will fill them correctly.
@@ -667,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (is_vm_hugetlb_page(vma))
 		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
 
+	/*
+	 * We need to invalidate the secondary MMU mappings only when
+	 * there could be a permission downgrade on the ptes of the
+	 * parent mm. And a permission downgrade will only happen if
+	 * is_cow_mapping() returns true.
+	 */
+	if (is_cow_mapping(vma->vm_flags))
+		mmu_notifier_invalidate_range_start(src_mm, addr, end);
+
+	ret = 0;
 	dst_pgd = pgd_offset(dst_mm, addr);
 	src_pgd = pgd_offset(src_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(src_pgd))
 			continue;
-		if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
-						vma, addr, next))
-			return -ENOMEM;
+		if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+					    vma, addr, next))) {
+			ret = -ENOMEM;
+			break;
+		}
 	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
-	return 0;
+
+	if (is_cow_mapping(vma->vm_flags))
+		mmu_notifier_invalidate_range_end(src_mm,
+						  vma->vm_start, end);
+	return ret;
 }
 
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
@@ -881,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 	unsigned long start = start_addr;
 	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
 	int fullmm = (*tlbp)->fullmm;
+	struct mm_struct *mm = vma->vm_mm;
 
+	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
 	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
 		unsigned long end;
 
@@ -946,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 		}
 	}
 out:
+	mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
 	return start;	/* which is now the end (or restart) address */
 }
 
@@ -1616,10 +1637,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 {
 	pgd_t *pgd;
 	unsigned long next;
-	unsigned long end = addr + size;
+	unsigned long start = addr, end = addr + size;
 	int err;
 
 	BUG_ON(addr >= end);
+	mmu_notifier_invalidate_range_start(mm, start, end);
 	pgd = pgd_offset(mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
@@ -1627,6 +1649,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
+	mmu_notifier_invalidate_range_end(mm, start, end);
 	return err;
 }
 EXPORT_SYMBOL_GPL(apply_to_page_range);
@@ -1839,7 +1862,7 @@ gotten:
 		 * seen in the presence of one thread doing SMC and another
 		 * thread doing COW.
 		 */
-		ptep_clear_flush(vma, address, page_table);
+		ptep_clear_flush_notify(vma, address, page_table);
 		set_pte_at(mm, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
 		lru_cache_add_active(new_page);
diff --git a/mm/mmap.c b/mm/mmap.c
index e5f9cb83d6d4..245c3d69067b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -26,6 +26,7 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -2061,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm)
 
 	/* mm's last user has gone, and its about to be pulled down */
 	arch_exit_mmap(mm);
+	mmu_notifier_release(mm);
 
 	lru_add_drain();
 	flush_cache_mm(mm);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
new file mode 100644
index 000000000000..5f4ef0250bee
--- /dev/null
+++ b/mm/mmu_notifier.c
@@ -0,0 +1,277 @@
+/*
+ *  linux/mm/mmu_notifier.c
+ *
+ *  Copyright (C) 2008  Qumranet, Inc.
+ *  Copyright (C) 2008  SGI
+ *             Christoph Lameter <clameter@sgi.com>
+ *
+ *  This work is licensed under the terms of the GNU GPL, version 2. See
+ *  the COPYING file in the top-level directory.
+ */
+
+#include <linux/rculist.h>
+#include <linux/mmu_notifier.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+/*
+ * This function can't run concurrently against mmu_notifier_register
+ * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
+ * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
+ * in parallel despite there being no task using this mm any more,
+ * through the vmas outside of the exit_mmap context, such as with
+ * vmtruncate. This serializes against mmu_notifier_unregister with
+ * the mmu_notifier_mm->lock in addition to RCU and it serializes
+ * against the other mmu notifiers with RCU. struct mmu_notifier_mm
+ * can't go away from under us as exit_mmap holds an mm_count pin
+ * itself.
+ */
+void __mmu_notifier_release(struct mm_struct *mm)
+{
+	struct mmu_notifier *mn;
+
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
+		mn = hlist_entry(mm->mmu_notifier_mm->list.first,
+				 struct mmu_notifier,
+				 hlist);
+		/*
+		 * We arrived before mmu_notifier_unregister so
+		 * mmu_notifier_unregister will do nothing other than
+		 * to wait ->release to finish and
+		 * mmu_notifier_unregister to return.
+		 */
+		hlist_del_init_rcu(&mn->hlist);
+		/*
+		 * RCU here will block mmu_notifier_unregister until
+		 * ->release returns.
+		 */
+		rcu_read_lock();
+		spin_unlock(&mm->mmu_notifier_mm->lock);
+		/*
+		 * if ->release runs before mmu_notifier_unregister it
+		 * must be handled as it's the only way for the driver
+		 * to flush all existing sptes and stop the driver
+		 * from establishing any more sptes before all the
+		 * pages in the mm are freed.
+		 */
+		if (mn->ops->release)
+			mn->ops->release(mn, mm);
+		rcu_read_unlock();
+		spin_lock(&mm->mmu_notifier_mm->lock);
+	}
+	spin_unlock(&mm->mmu_notifier_mm->lock);
+
+	/*
+	 * synchronize_rcu here prevents mmu_notifier_release to
+	 * return to exit_mmap (which would proceed freeing all pages
+	 * in the mm) until the ->release method returns, if it was
+	 * invoked by mmu_notifier_unregister.
+	 *
+	 * The mmu_notifier_mm can't go away from under us because one
+	 * mm_count is hold by exit_mmap.
+	 */
+	synchronize_rcu();
+}
+
+/*
+ * If no young bitflag is supported by the hardware, ->clear_flush_young can
+ * unmap the address and return 1 or 0 depending if the mapping previously
+ * existed or not.
+ */
+int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
+					unsigned long address)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+	int young = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->clear_flush_young)
+			young |= mn->ops->clear_flush_young(mn, mm, address);
+	}
+	rcu_read_unlock();
+
+	return young;
+}
+
+void __mmu_notifier_invalidate_page(struct mm_struct *mm,
+					  unsigned long address)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->invalidate_page)
+			mn->ops->invalidate_page(mn, mm, address);
+	}
+	rcu_read_unlock();
+}
+
+void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->invalidate_range_start)
+			mn->ops->invalidate_range_start(mn, mm, start, end);
+	}
+	rcu_read_unlock();
+}
+
+void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->invalidate_range_end)
+			mn->ops->invalidate_range_end(mn, mm, start, end);
+	}
+	rcu_read_unlock();
+}
+
+static int do_mmu_notifier_register(struct mmu_notifier *mn,
+				    struct mm_struct *mm,
+				    int take_mmap_sem)
+{
+	struct mmu_notifier_mm *mmu_notifier_mm;
+	int ret;
+
+	BUG_ON(atomic_read(&mm->mm_users) <= 0);
+
+	ret = -ENOMEM;
+	mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
+	if (unlikely(!mmu_notifier_mm))
+		goto out;
+
+	if (take_mmap_sem)
+		down_write(&mm->mmap_sem);
+	ret = mm_take_all_locks(mm);
+	if (unlikely(ret))
+		goto out_cleanup;
+
+	if (!mm_has_notifiers(mm)) {
+		INIT_HLIST_HEAD(&mmu_notifier_mm->list);
+		spin_lock_init(&mmu_notifier_mm->lock);
+		mm->mmu_notifier_mm = mmu_notifier_mm;
+		mmu_notifier_mm = NULL;
+	}
+	atomic_inc(&mm->mm_count);
+
+	/*
+	 * Serialize the update against mmu_notifier_unregister. A
+	 * side note: mmu_notifier_release can't run concurrently with
+	 * us because we hold the mm_users pin (either implicitly as
+	 * current->mm or explicitly with get_task_mm() or similar).
+	 * We can't race against any other mmu notifier method either
+	 * thanks to mm_take_all_locks().
+	 */
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list);
+	spin_unlock(&mm->mmu_notifier_mm->lock);
+
+	mm_drop_all_locks(mm);
+out_cleanup:
+	if (take_mmap_sem)
+		up_write(&mm->mmap_sem);
+	/* kfree() does nothing if mmu_notifier_mm is NULL */
+	kfree(mmu_notifier_mm);
+out:
+	BUG_ON(atomic_read(&mm->mm_users) <= 0);
+	return ret;
+}
+
+/*
+ * Must not hold mmap_sem nor any other VM related lock when calling
+ * this registration function. Must also ensure mm_users can't go down
+ * to zero while this runs to avoid races with mmu_notifier_release,
+ * so mm has to be current->mm or the mm should be pinned safely such
+ * as with get_task_mm(). If the mm is not current->mm, the mm_users
+ * pin should be released by calling mmput after mmu_notifier_register
+ * returns. mmu_notifier_unregister must be always called to
+ * unregister the notifier. mm_count is automatically pinned to allow
+ * mmu_notifier_unregister to safely run at any time later, before or
+ * after exit_mmap. ->release will always be called before exit_mmap
+ * frees the pages.
+ */
+int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	return do_mmu_notifier_register(mn, mm, 1);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_register);
+
+/*
+ * Same as mmu_notifier_register but here the caller must hold the
+ * mmap_sem in write mode.
+ */
+int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	return do_mmu_notifier_register(mn, mm, 0);
+}
+EXPORT_SYMBOL_GPL(__mmu_notifier_register);
+
+/* this is called after the last mmu_notifier_unregister() returned */
+void __mmu_notifier_mm_destroy(struct mm_struct *mm)
+{
+	BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list));
+	kfree(mm->mmu_notifier_mm);
+	mm->mmu_notifier_mm = LIST_POISON1; /* debug */
+}
+
+/*
+ * This releases the mm_count pin automatically and frees the mm
+ * structure if it was the last user of it. It serializes against
+ * running mmu notifiers with RCU and against mmu_notifier_unregister
+ * with the unregister lock + RCU. All sptes must be dropped before
+ * calling mmu_notifier_unregister. ->release or any other notifier
+ * method may be invoked concurrently with mmu_notifier_unregister,
+ * and only after mmu_notifier_unregister returned we're guaranteed
+ * that ->release or any other method can't run anymore.
+ */
+void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	BUG_ON(atomic_read(&mm->mm_count) <= 0);
+
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	if (!hlist_unhashed(&mn->hlist)) {
+		hlist_del_rcu(&mn->hlist);
+
+		/*
+		 * RCU here will force exit_mmap to wait ->release to finish
+		 * before freeing the pages.
+		 */
+		rcu_read_lock();
+		spin_unlock(&mm->mmu_notifier_mm->lock);
+		/*
+		 * exit_mmap will block in mmu_notifier_release to
+		 * guarantee ->release is called before freeing the
+		 * pages.
+		 */
+		if (mn->ops->release)
+			mn->ops->release(mn, mm);
+		rcu_read_unlock();
+	} else
+		spin_unlock(&mm->mmu_notifier_mm->lock);
+
+	/*
+	 * Wait any running method to finish, of course including
+	 * ->release if it was run by mmu_notifier_relase instead of us.
+	 */
+	synchronize_rcu();
+
+	BUG_ON(atomic_read(&mm->mm_count) <= 0);
+
+	mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abd645a3b0a0..fded06f923f4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -21,6 +21,7 @@
 #include <linux/syscalls.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -203,10 +204,12 @@ success:
 		dirty_accountable = 1;
 	}
 
+	mmu_notifier_invalidate_range_start(mm, start, end);
 	if (is_vm_hugetlb_page(vma))
 		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
 	else
 		change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
+	mmu_notifier_invalidate_range_end(mm, start, end);
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
 	return 0;
diff --git a/mm/mremap.c b/mm/mremap.c
index 08e3c7f2bd15..1a7743923c8c 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -18,6 +18,7 @@
 #include <linux/highmem.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -74,7 +75,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *old_pte, *new_pte, pte;
 	spinlock_t *old_ptl, *new_ptl;
+	unsigned long old_start;
 
+	old_start = old_addr;
+	mmu_notifier_invalidate_range_start(vma->vm_mm,
+					    old_start, old_end);
 	if (vma->vm_file) {
 		/*
 		 * Subtle point from Rajesh Venkatasubramanian: before
@@ -116,6 +121,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (mapping)
 		spin_unlock(&mapping->i_mmap_lock);
+	mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
 }
 
 #define LATENCY_LIMIT	(64 * PAGE_SIZE)
diff --git a/mm/rmap.c b/mm/rmap.c
index 39ae5a9bf382..99bc3f9cd796 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/memcontrol.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/tlbflush.h>
 
@@ -287,7 +288,7 @@ static int page_referenced_one(struct page *page,
 	if (vma->vm_flags & VM_LOCKED) {
 		referenced++;
 		*mapcount = 1;	/* break early from loop */
-	} else if (ptep_clear_flush_young(vma, address, pte))
+	} else if (ptep_clear_flush_young_notify(vma, address, pte))
 		referenced++;
 
 	/* Pretend the page is referenced if the task has the
@@ -457,7 +458,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
 		pte_t entry;
 
 		flush_cache_page(vma, address, pte_pfn(*pte));
-		entry = ptep_clear_flush(vma, address, pte);
+		entry = ptep_clear_flush_notify(vma, address, pte);
 		entry = pte_wrprotect(entry);
 		entry = pte_mkclean(entry);
 		set_pte_at(mm, address, pte, entry);
@@ -705,14 +706,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 * skipped over this mm) then we should reactivate it.
 	 */
 	if (!migration && ((vma->vm_flags & VM_LOCKED) ||
-			(ptep_clear_flush_young(vma, address, pte)))) {
+			(ptep_clear_flush_young_notify(vma, address, pte)))) {
 		ret = SWAP_FAIL;
 		goto out_unmap;
 	}
 
 	/* Nuke the page table entry. */
 	flush_cache_page(vma, address, page_to_pfn(page));
-	pteval = ptep_clear_flush(vma, address, pte);
+	pteval = ptep_clear_flush_notify(vma, address, pte);
 
 	/* Move the dirty bit to the physical page now the pte is gone. */
 	if (pte_dirty(pteval))
@@ -837,12 +838,12 @@ static void try_to_unmap_cluster(unsigned long cursor,
 		page = vm_normal_page(vma, address, *pte);
 		BUG_ON(!page || PageAnon(page));
 
-		if (ptep_clear_flush_young(vma, address, pte))
+		if (ptep_clear_flush_young_notify(vma, address, pte))
 			continue;
 
 		/* Nuke the page table entry. */
 		flush_cache_page(vma, address, pte_pfn(*pte));
-		pteval = ptep_clear_flush(vma, address, pte);
+		pteval = ptep_clear_flush_notify(vma, address, pte);
 
 		/* If nonlinear, store the file page offset in the pte. */
 		if (page->index != linear_page_index(vma, address))
-- 
cgit v1.2.3


From 8ab22b9abb5c55413802e4adc9aa6223324547c3 Mon Sep 17 00:00:00 2001
From: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Date: Mon, 28 Jul 2008 15:46:36 -0700
Subject: vfs: pagecache usage optimization for pagesize!=blocksize

When we read some part of a file through pagecache, if there is a
pagecache of corresponding index but this page is not uptodate, read IO
is issued and this page will be uptodate.

I think this is good for pagesize == blocksize environment but there is
room for improvement on pagesize != blocksize environment.  Because in
this case a page can have multiple buffers and even if a page is not
uptodate, some buffers can be uptodate.

So I suggest that when all buffers which correspond to a part of a file
that we want to read are uptodate, use this pagecache and copy data from
this pagecache to user buffer even if a page is not uptodate.  This can
reduce read IO and improve system throughput.

I wrote a benchmark program and got result number with this program.

This benchmark do:

  1: mount and open a test file.

  2: create a 512MB file.

  3: close a file and umount.

  4: mount and again open a test file.

  5: pwrite randomly 300000 times on a test file.  offset is aligned
     by IO size(1024bytes).

  6: measure time of preading randomly 100000 times on a test file.

The result was:
	2.6.26
        330 sec

	2.6.26-patched
        226 sec

Arch:i386
Filesystem:ext3
Blocksize:1024 bytes
Memory: 1GB

On ext3/4, a file is written through buffer/block.  So random read/write
mixed workloads or random read after random write workloads are optimized
with this patch under pagesize != blocksize environment.  This test result
showed this.

The benchmark program is as follows:

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>

#define LEN 1024
#define LOOP 1024*512 /* 512MB */

main(void)
{
	unsigned long i, offset, filesize;
	int fd;
	char buf[LEN];
	time_t t1, t2;

	if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
		perror("cannot mount\n");
		exit(1);
	}
	memset(buf, 0, LEN);
	fd = open("/root/test1/testfile", O_CREAT|O_RDWR|O_TRUNC);
	if (fd < 0) {
		perror("cannot open file\n");
		exit(1);
	}
	for (i = 0; i < LOOP; i++)
		write(fd, buf, LEN);
	close(fd);
	if (umount("/root/test1/") < 0) {
		perror("cannot umount\n");
		exit(1);
	}
	if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
		perror("cannot mount\n");
		exit(1);
	}
	fd = open("/root/test1/testfile", O_RDWR);
	if (fd < 0) {
		perror("cannot open file\n");
		exit(1);
	}

	filesize = LEN * LOOP;
	for (i = 0; i < 300000; i++){
		offset = (random() % filesize) & (~(LEN - 1));
		pwrite(fd, buf, LEN, offset);
	}
	printf("start test\n");
	time(&t1);
	for (i = 0; i < 100000; i++){
		offset = (random() % filesize) & (~(LEN - 1));
		pread(fd, buf, LEN, offset);
	}
	time(&t2);
	printf("%ld sec\n", t2-t1);
	close(fd);
	if (umount("/root/test1/") < 0) {
		perror("cannot umount\n");
		exit(1);
	}
}

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jan Kara <jack@ucw.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                 | 46 +++++++++++++++++++++++
 fs/ext2/inode.c             |  1 +
 fs/ext3/inode.c             | 67 +++++++++++++++++----------------
 fs/ext4/inode.c             | 92 +++++++++++++++++++++++----------------------
 include/linux/buffer_head.h |  2 +
 include/linux/fs.h          | 44 +++++++++++-----------
 mm/filemap.c                | 14 ++++++-
 7 files changed, 167 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index f95805019639..ca12a6bb82b1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2095,6 +2095,52 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 }
 EXPORT_SYMBOL(generic_write_end);
 
+/*
+ * block_is_partially_uptodate checks whether buffers within a page are
+ * uptodate or not.
+ *
+ * Returns true if all buffers which correspond to a file portion
+ * we want to read are uptodate.
+ */
+int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
+					unsigned long from)
+{
+	struct inode *inode = page->mapping->host;
+	unsigned block_start, block_end, blocksize;
+	unsigned to;
+	struct buffer_head *bh, *head;
+	int ret = 1;
+
+	if (!page_has_buffers(page))
+		return 0;
+
+	blocksize = 1 << inode->i_blkbits;
+	to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
+	to = from + to;
+	if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
+		return 0;
+
+	head = page_buffers(page);
+	bh = head;
+	block_start = 0;
+	do {
+		block_end = block_start + blocksize;
+		if (block_end > from && block_start < to) {
+			if (!buffer_uptodate(bh)) {
+				ret = 0;
+				break;
+			}
+			if (block_end >= to)
+				break;
+		}
+		block_start = block_end;
+		bh = bh->b_this_page;
+	} while (bh != head);
+
+	return ret;
+}
+EXPORT_SYMBOL(block_is_partially_uptodate);
+
 /*
  * Generic "read page" function for block devices that have the normal
  * get_block functionality. This is most of the block device filesystems.
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 384fc0d1dd74..991d6dfeb51f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = {
 	.direct_IO		= ext2_direct_IO,
 	.writepages		= ext2_writepages,
 	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate	= block_is_partially_uptodate,
 };
 
 const struct address_space_operations ext2_aops_xip = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3bf07d70b914..507d8689b111 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page)
 }
 
 static const struct address_space_operations ext3_ordered_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_ordered_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext3_write_begin,
-	.write_end	= ext3_ordered_write_end,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
-	.direct_IO	= ext3_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext3_readpage,
+	.readpages		= ext3_readpages,
+	.writepage		= ext3_ordered_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext3_write_begin,
+	.write_end		= ext3_ordered_write_end,
+	.bmap			= ext3_bmap,
+	.invalidatepage		= ext3_invalidatepage,
+	.releasepage		= ext3_releasepage,
+	.direct_IO		= ext3_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext3_writeback_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_writeback_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext3_write_begin,
-	.write_end	= ext3_writeback_write_end,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
-	.direct_IO	= ext3_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext3_readpage,
+	.readpages		= ext3_readpages,
+	.writepage		= ext3_writeback_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext3_write_begin,
+	.write_end		= ext3_writeback_write_end,
+	.bmap			= ext3_bmap,
+	.invalidatepage		= ext3_invalidatepage,
+	.releasepage		= ext3_releasepage,
+	.direct_IO		= ext3_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext3_journalled_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_journalled_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext3_write_begin,
-	.write_end	= ext3_journalled_write_end,
-	.set_page_dirty	= ext3_journalled_set_page_dirty,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
+	.readpage		= ext3_readpage,
+	.readpages		= ext3_readpages,
+	.writepage		= ext3_journalled_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext3_write_begin,
+	.write_end		= ext3_journalled_write_end,
+	.set_page_dirty		= ext3_journalled_set_page_dirty,
+	.bmap			= ext3_bmap,
+	.invalidatepage		= ext3_invalidatepage,
+	.releasepage		= ext3_releasepage,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 void ext3_set_aops(struct inode *inode)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ca2763df091..9843b046c235 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2806,59 +2806,63 @@ static int ext4_journalled_set_page_dirty(struct page *page)
 }
 
 static const struct address_space_operations ext4_ordered_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_normal_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_write_begin,
-	.write_end	= ext4_ordered_write_end,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_invalidatepage,
-	.releasepage	= ext4_releasepage,
-	.direct_IO	= ext4_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_normal_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_write_begin,
+	.write_end		= ext4_ordered_write_end,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext4_writeback_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_normal_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_write_begin,
-	.write_end	= ext4_writeback_write_end,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_invalidatepage,
-	.releasepage	= ext4_releasepage,
-	.direct_IO	= ext4_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_normal_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_write_begin,
+	.write_end		= ext4_writeback_write_end,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext4_journalled_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_journalled_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_write_begin,
-	.write_end	= ext4_journalled_write_end,
-	.set_page_dirty	= ext4_journalled_set_page_dirty,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_invalidatepage,
-	.releasepage	= ext4_releasepage,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_journalled_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_write_begin,
+	.write_end		= ext4_journalled_write_end,
+	.set_page_dirty		= ext4_journalled_set_page_dirty,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext4_da_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_da_writepage,
-	.writepages	= ext4_da_writepages,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_da_write_begin,
-	.write_end	= ext4_da_write_end,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_da_invalidatepage,
-	.releasepage	= ext4_releasepage,
-	.direct_IO	= ext4_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_da_writepage,
+	.writepages		= ext4_da_writepages,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_da_write_begin,
+	.write_end		= ext4_da_write_end,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_da_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 void ext4_set_aops(struct inode *inode)
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 82aa36c53ea7..50cfe8ceb478 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -205,6 +205,8 @@ void block_invalidatepage(struct page *page, unsigned long offset);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
 int block_read_full_page(struct page*, get_block_t*);
+int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
+				unsigned long from);
 int block_write_begin(struct file *, struct address_space *,
 				loff_t, unsigned, unsigned,
 				struct page **, void **, get_block_t*);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8252b045e624..580b513668fe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -443,6 +443,27 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 	return i->count;
 }
 
+/*
+ * "descriptor" for what we're up to with a read.
+ * This allows us to use the same read code yet
+ * have multiple different users of the data that
+ * we read from a file.
+ *
+ * The simplest case just copies the data to user
+ * mode.
+ */
+typedef struct {
+	size_t written;
+	size_t count;
+	union {
+		char __user *buf;
+		void *data;
+	} arg;
+	int error;
+} read_descriptor_t;
+
+typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
+		unsigned long, unsigned long);
 
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -484,6 +505,8 @@ struct address_space_operations {
 	int (*migratepage) (struct address_space *,
 			struct page *, struct page *);
 	int (*launder_page) (struct page *);
+	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+					unsigned long);
 };
 
 /*
@@ -1198,27 +1221,6 @@ struct block_device_operations {
 	struct module *owner;
 };
 
-/*
- * "descriptor" for what we're up to with a read.
- * This allows us to use the same read code yet
- * have multiple different users of the data that
- * we read from a file.
- *
- * The simplest case just copies the data to user
- * mode.
- */
-typedef struct {
-	size_t written;
-	size_t count;
-	union {
-		char __user * buf;
-		void *data;
-	} arg;
-	int error;
-} read_descriptor_t;
-
-typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long);
-
 /* These macros are for out of kernel modules to test that
  * the kernel supports the unlocked_ioctl and compat_ioctl
  * fields in struct file_operations. */
diff --git a/mm/filemap.c b/mm/filemap.c
index 5de7633e1dbe..42bbc6909ba4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1023,8 +1023,17 @@ find_page:
 					ra, filp, page,
 					index, last_index - index);
 		}
-		if (!PageUptodate(page))
-			goto page_not_up_to_date;
+		if (!PageUptodate(page)) {
+			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
+					!mapping->a_ops->is_partially_uptodate)
+				goto page_not_up_to_date;
+			if (TestSetPageLocked(page))
+				goto page_not_up_to_date;
+			if (!mapping->a_ops->is_partially_uptodate(page,
+								desc, offset))
+				goto page_not_up_to_date_locked;
+			unlock_page(page);
+		}
 page_ok:
 		/*
 		 * i_size must be checked after we know the page is Uptodate.
@@ -1094,6 +1103,7 @@ page_not_up_to_date:
 		if (lock_page_killable(page))
 			goto readpage_eio;
 
+page_not_up_to_date_locked:
 		/* Did it get truncated before we got the lock? */
 		if (!page->mapping) {
 			unlock_page(page);
-- 
cgit v1.2.3


From 424f525a1241351da947fb48a938128ddd774511 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Tue, 29 Jul 2008 01:30:26 +0200
Subject: mfd: accept pure device as a parent, not only platform_device

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/mfd-core.c   | 14 +++++++-------
 drivers/mfd/tc6393xb.c   |  4 ++--
 include/linux/mfd/core.h |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index ad4e4d16a36a..9c9c126ed334 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -15,7 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 
-static int mfd_add_device(struct platform_device *parent,
+static int mfd_add_device(struct device *parent, int id,
 			  const struct mfd_cell *cell,
 			  struct resource *mem_base,
 			  int irq_base)
@@ -25,11 +25,11 @@ static int mfd_add_device(struct platform_device *parent,
 	int ret = -ENOMEM;
 	int r;
 
-	pdev = platform_device_alloc(cell->name, parent->id);
+	pdev = platform_device_alloc(cell->name, id);
 	if (!pdev)
 		goto fail_alloc;
 
-	pdev->dev.parent = &parent->dev;
+	pdev->dev.parent = parent;
 
 	ret = platform_device_add_data(pdev,
 			cell->platform_data, cell->data_size);
@@ -75,7 +75,7 @@ fail_alloc:
 	return ret;
 }
 
-int mfd_add_devices(struct platform_device *parent,
+int mfd_add_devices(struct device *parent, int id,
 		    const struct mfd_cell *cells, int n_devs,
 		    struct resource *mem_base,
 		    int irq_base)
@@ -84,7 +84,7 @@ int mfd_add_devices(struct platform_device *parent,
 	int ret = 0;
 
 	for (i = 0; i < n_devs; i++) {
-		ret = mfd_add_device(parent, cells + i, mem_base, irq_base);
+		ret = mfd_add_device(parent, id, cells + i, mem_base, irq_base);
 		if (ret)
 			break;
 	}
@@ -102,9 +102,9 @@ static int mfd_remove_devices_fn(struct device *dev, void *unused)
 	return 0;
 }
 
-void mfd_remove_devices(struct platform_device *parent)
+void mfd_remove_devices(struct device *parent)
 {
-	device_for_each_child(&parent->dev, NULL, mfd_remove_devices_fn);
+	device_for_each_child(parent, NULL, mfd_remove_devices_fn);
 }
 EXPORT_SYMBOL(mfd_remove_devices);
 
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 9908aaa4881a..f4fd797c1590 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -471,7 +471,7 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 	tc6393xb_cells[TC6393XB_CELL_NAND].data_size =
 		sizeof(tc6393xb_cells[TC6393XB_CELL_NAND]);
 
-	retval = mfd_add_devices(dev,
+	retval = mfd_add_devices(&dev->dev, dev->id,
 			tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
 			iomem, tcpd->irq_base);
 
@@ -505,7 +505,7 @@ static int __devexit tc6393xb_remove(struct platform_device *dev)
 	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
 	int ret;
 
-	mfd_remove_devices(dev);
+	mfd_remove_devices(&dev->dev);
 
 	if (tc6393xb->irq)
 		tc6393xb_detach_irq(dev);
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index ea45d4a5a2ac..49ef857cdb2d 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -45,11 +45,11 @@ struct mfd_cell {
 	const struct resource	*resources;
 };
 
-extern int mfd_add_devices(struct platform_device *parent,
+extern int mfd_add_devices(struct device *parent, int id,
 			   const struct mfd_cell *cells, int n_devs,
 			   struct resource *mem_base,
 			   int irq_base);
 
-extern void mfd_remove_devices(struct platform_device *parent);
+extern void mfd_remove_devices(struct device *parent);
 
 #endif
-- 
cgit v1.2.3


From e930bffe95e1e886a1ede80726ea38df5838d067 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@qumranet.com>
Date: Fri, 25 Jul 2008 16:24:52 +0200
Subject: KVM: Synchronize guest physical memory map to host virtual memory map

Synchronize changes to host virtual addresses which are part of
a KVM memory slot to the KVM shadow mmu.  This allows pte operations
like swapping, page migration, and madvise() to transparently work
with KVM.

Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c         | 100 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/paging_tmpl.h |  12 ++++
 include/asm-x86/kvm_host.h |   6 ++
 include/linux/kvm_host.h   |  24 ++++++++
 virt/kvm/kvm_main.c        | 135 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 277 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2fa231923cf7..0bfe2bd305eb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 	account_shadowed(kvm, gfn);
 }
 
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+	u64 *spte;
+	int need_tlb_flush = 0;
+
+	while ((spte = rmap_next(kvm, rmapp, NULL))) {
+		BUG_ON(!(*spte & PT_PRESENT_MASK));
+		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
+		rmap_remove(kvm, spte);
+		set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+		need_tlb_flush = 1;
+	}
+	return need_tlb_flush;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+{
+	int i;
+	int retval = 0;
+
+	/*
+	 * If mmap_sem isn't taken, we can look the memslots with only
+	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
+	 */
+	for (i = 0; i < kvm->nmemslots; i++) {
+		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+		unsigned long start = memslot->userspace_addr;
+		unsigned long end;
+
+		/* mmu_lock protects userspace_addr */
+		if (!start)
+			continue;
+
+		end = start + (memslot->npages << PAGE_SHIFT);
+		if (hva >= start && hva < end) {
+			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+			retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+			retval |= handler(kvm,
+					  &memslot->lpage_info[
+						  gfn_offset /
+						  KVM_PAGES_PER_HPAGE].rmap_pde);
+		}
+	}
+
+	return retval;
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+	u64 *spte;
+	int young = 0;
+
+	spte = rmap_next(kvm, rmapp, NULL);
+	while (spte) {
+		int _young;
+		u64 _spte = *spte;
+		BUG_ON(!(_spte & PT_PRESENT_MASK));
+		_young = _spte & PT_ACCESSED_MASK;
+		if (_young) {
+			young = 1;
+			clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
+		}
+		spte = rmap_next(kvm, rmapp, spte);
+	}
+	return young;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+}
+
 #ifdef MMU_DEBUG
 static int is_empty_shadow_page(u64 *spt)
 {
@@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 	int r;
 	int largepage = 0;
 	pfn_t pfn;
+	unsigned long mmu_seq;
 
 	down_read(&current->mm->mmap_sem);
 	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 		largepage = 1;
 	}
 
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	/* implicit mb(), we'll read before PT lock is unlocked */
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	up_read(&current->mm->mmap_sem);
 
@@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
+	if (mmu_notifier_retry(vcpu, mmu_seq))
+		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
 	r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
 			 PT32E_ROOT_LEVEL);
@@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 
 
 	return r;
+
+out_unlock:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	return 0;
 }
 
 
@@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 	int r;
 	int largepage = 0;
 	gfn_t gfn = gpa >> PAGE_SHIFT;
+	unsigned long mmu_seq;
 
 	ASSERT(vcpu);
 	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		largepage = 1;
 	}
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	/* implicit mb(), we'll read before PT lock is unlocked */
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	up_read(&current->mm->mmap_sem);
 	if (is_error_pfn(pfn)) {
@@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 		return 1;
 	}
 	spin_lock(&vcpu->kvm->mmu_lock);
+	if (mmu_notifier_retry(vcpu, mmu_seq))
+		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
 	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
 			 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return r;
+
+out_unlock:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	return 0;
 }
 
 static void nonpaging_free(struct kvm_vcpu *vcpu)
@@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		vcpu->arch.update_pte.largepage = 1;
 	}
+	vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	/* implicit mb(), we'll read before PT lock is unlocked */
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	up_read(&current->mm->mmap_sem);
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4d918220baeb..f72ac1fa35f0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 	pfn = vcpu->arch.update_pte.pfn;
 	if (is_error_pfn(pfn))
 		return;
+	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
+		return;
 	kvm_get_pfn(pfn);
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
 		     gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	int r;
 	pfn_t pfn;
 	int largepage = 0;
+	unsigned long mmu_seq;
 
 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 	kvm_mmu_audit(vcpu, "pre page fault");
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 			largepage = 1;
 		}
 	}
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	/* implicit mb(), we'll read before PT lock is unlocked */
 	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
 	up_read(&current->mm->mmap_sem);
 
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
+	if (mmu_notifier_retry(vcpu, mmu_seq))
+		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
 	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
 				  largepage, &write_pt, pfn);
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return write_pt;
+
+out_unlock:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	return 0;
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index bc34dc21f178..0f3c53114614 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -13,6 +13,7 @@
 
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/mmu_notifier.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
@@ -251,6 +252,7 @@ struct kvm_vcpu_arch {
 		gfn_t gfn;	/* presumed gfn during guest pte update */
 		pfn_t pfn;	/* pfn corresponding to that gfn */
 		int largepage;
+		unsigned long mmu_seq;
 	} update_pte;
 
 	struct i387_fxsave_struct host_fx_image;
@@ -729,4 +731,8 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 	KVM_EX_ENTRY " 666b, 667b \n\t" \
 	".popsection"
 
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 07d68a8ae8e9..8525afc53107 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -121,6 +121,12 @@ struct kvm {
 	struct kvm_coalesced_mmio_dev *coalesced_mmio_dev;
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
+
+#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+	struct mmu_notifier mmu_notifier;
+	unsigned long mmu_notifier_seq;
+	long mmu_notifier_count;
+#endif
 };
 
 /* The guest did something we don't support. */
@@ -332,4 +338,22 @@ int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
 #define kvm_trace_cleanup() ((void)0)
 #endif
 
+#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
+{
+	if (unlikely(vcpu->kvm->mmu_notifier_count))
+		return 1;
+	/*
+	 * Both reads happen under the mmu_lock and both values are
+	 * modified under mmu_lock, so there's no need of smb_rmb()
+	 * here in between, otherwise mmu_notifier_count should be
+	 * read before mmu_notifier_seq, see
+	 * mmu_notifier_invalidate_range_end write side.
+	 */
+	if (vcpu->kvm->mmu_notifier_seq != mmu_seq)
+		return 1;
+	return 0;
+}
+#endif
+
 #endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3735212cd3f8..7dd9b0b85e4e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -192,6 +192,123 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
 
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
+{
+	return container_of(mn, struct kvm, mmu_notifier);
+}
+
+static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+					     struct mm_struct *mm,
+					     unsigned long address)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	int need_tlb_flush;
+
+	/*
+	 * When ->invalidate_page runs, the linux pte has been zapped
+	 * already but the page is still allocated until
+	 * ->invalidate_page returns. So if we increase the sequence
+	 * here the kvm page fault will notice if the spte can't be
+	 * established because the page is going to be freed. If
+	 * instead the kvm page fault establishes the spte before
+	 * ->invalidate_page runs, kvm_unmap_hva will release it
+	 * before returning.
+	 *
+	 * The sequence increase only need to be seen at spin_unlock
+	 * time, and not at spin_lock time.
+	 *
+	 * Increasing the sequence after the spin_unlock would be
+	 * unsafe because the kvm page fault could then establish the
+	 * pte after kvm_unmap_hva returned, without noticing the page
+	 * is going to be freed.
+	 */
+	spin_lock(&kvm->mmu_lock);
+	kvm->mmu_notifier_seq++;
+	need_tlb_flush = kvm_unmap_hva(kvm, address);
+	spin_unlock(&kvm->mmu_lock);
+
+	/* we've to flush the tlb before the pages can be freed */
+	if (need_tlb_flush)
+		kvm_flush_remote_tlbs(kvm);
+
+}
+
+static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+						    struct mm_struct *mm,
+						    unsigned long start,
+						    unsigned long end)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	int need_tlb_flush = 0;
+
+	spin_lock(&kvm->mmu_lock);
+	/*
+	 * The count increase must become visible at unlock time as no
+	 * spte can be established without taking the mmu_lock and
+	 * count is also read inside the mmu_lock critical section.
+	 */
+	kvm->mmu_notifier_count++;
+	for (; start < end; start += PAGE_SIZE)
+		need_tlb_flush |= kvm_unmap_hva(kvm, start);
+	spin_unlock(&kvm->mmu_lock);
+
+	/* we've to flush the tlb before the pages can be freed */
+	if (need_tlb_flush)
+		kvm_flush_remote_tlbs(kvm);
+}
+
+static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+						  struct mm_struct *mm,
+						  unsigned long start,
+						  unsigned long end)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+
+	spin_lock(&kvm->mmu_lock);
+	/*
+	 * This sequence increase will notify the kvm page fault that
+	 * the page that is going to be mapped in the spte could have
+	 * been freed.
+	 */
+	kvm->mmu_notifier_seq++;
+	/*
+	 * The above sequence increase must be visible before the
+	 * below count decrease but both values are read by the kvm
+	 * page fault under mmu_lock spinlock so we don't need to add
+	 * a smb_wmb() here in between the two.
+	 */
+	kvm->mmu_notifier_count--;
+	spin_unlock(&kvm->mmu_lock);
+
+	BUG_ON(kvm->mmu_notifier_count < 0);
+}
+
+static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
+					      struct mm_struct *mm,
+					      unsigned long address)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	int young;
+
+	spin_lock(&kvm->mmu_lock);
+	young = kvm_age_hva(kvm, address);
+	spin_unlock(&kvm->mmu_lock);
+
+	if (young)
+		kvm_flush_remote_tlbs(kvm);
+
+	return young;
+}
+
+static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+	.invalidate_page	= kvm_mmu_notifier_invalidate_page,
+	.invalidate_range_start	= kvm_mmu_notifier_invalidate_range_start,
+	.invalidate_range_end	= kvm_mmu_notifier_invalidate_range_end,
+	.clear_flush_young	= kvm_mmu_notifier_clear_flush_young,
+};
+#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
+
 static struct kvm *kvm_create_vm(void)
 {
 	struct kvm *kvm = kvm_arch_create_vm();
@@ -212,6 +329,21 @@ static struct kvm *kvm_create_vm(void)
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
 #endif
 
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+	{
+		int err;
+		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
+		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+		if (err) {
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+			put_page(page);
+#endif
+			kfree(kvm);
+			return ERR_PTR(err);
+		}
+	}
+#endif
+
 	kvm->mm = current->mm;
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
@@ -271,6 +403,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	if (kvm->coalesced_mmio_ring != NULL)
 		free_page((unsigned long)kvm->coalesced_mmio_ring);
+#endif
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #endif
 	kvm_arch_destroy_vm(kvm);
 	mmdrop(mm);
-- 
cgit v1.2.3


From ed8486243379ef3e6c61363df915882945c0eaec Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 29 Jul 2008 11:30:57 +0300
Subject: KVM: Advertise synchronized mmu support to userspace

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c  | 1 +
 include/linux/kvm.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c7b01efe0646..0d682fc6aeb3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PIT:
 	case KVM_CAP_NOP_IO_DELAY:
 	case KVM_CAP_MP_STATE:
+	case KVM_CAP_SYNC_MMU:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0ea064cbfbc8..69511f74f912 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -371,6 +371,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_PV_MMU 13
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
+#define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 
 /*
  * ioctls for VM fds
-- 
cgit v1.2.3


From 8978b74253280d59e97cf49a3ec2c0cbccd5b801 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 29 Jul 2008 13:38:53 +0900
Subject: generic, x86: fix add iommu_num_pages helper function

This IOMMU helper function doesn't work for some architectures:

  http://marc.info/?l=linux-kernel&m=121699304403202&w=2

It also breaks POWER and SPARC builds:

  http://marc.info/?l=linux-kernel&m=121730388001890&w=2

Currently, only x86 IOMMUs use this so let's move it to x86 for
now.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c    | 8 ++++++++
 include/asm-x86/iommu.h      | 2 ++
 include/linux/iommu-helper.h | 1 -
 lib/iommu-helper.c           | 8 --------
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 8dbffb846de9..87d4d6964ec2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void)
 
 	pci_swiotlb_init();
 }
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+	unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+	return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
 #endif
 
 /*
diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index ecc8061904a9..5f888cc5be49 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -7,6 +7,8 @@ extern struct dma_mapping_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 
+extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
+
 #ifdef CONFIG_GART_IOMMU
 extern int gart_iommu_aperture;
 extern int gart_iommu_aperture_allowed;
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index f8598f583944..c975caf75385 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -8,4 +8,3 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 				      unsigned long align_mask);
 extern void iommu_area_free(unsigned long *map, unsigned long start,
 			    unsigned int nr);
-extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 889ddce2021e..a3b8d4c3f77a 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -80,11 +80,3 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
 	}
 }
 EXPORT_SYMBOL(iommu_area_free);
-
-unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
-{
-	unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
-
-	return size >> PAGE_SHIFT;
-}
-EXPORT_SYMBOL(iommu_num_pages);
-- 
cgit v1.2.3


From 1795cf48b322b4d19230a40dbe7181acedd34a94 Mon Sep 17 00:00:00 2001
From: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Date: Tue, 29 Jul 2008 22:10:56 +0900
Subject: sh/maple: clean maple bus code

This patch cleans up the handling of the maple bus queue to remove
the risk of races when adding packets. It also removes references to the
redundant connect and disconnect functions.

Signed-off-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/maple/maple.c | 265 ++++++++++++++++++++++++++++++++---------------
 include/linux/maple.h    |   6 +-
 2 files changed, 189 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index 617efb1640b1..be97789fa5fd 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -24,13 +24,12 @@
 #include <linux/slab.h>
 #include <linux/maple.h>
 #include <linux/dma-mapping.h>
+#include <linux/delay.h>
 #include <asm/cacheflush.h>
 #include <asm/dma.h>
 #include <asm/io.h>
-#include <asm/mach/dma.h>
-#include <asm/mach/sysasic.h>
-#include <asm/mach/maple.h>
-#include <linux/delay.h>
+#include <mach/dma.h>
+#include <mach/sysasic.h>
 
 MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin");
 MODULE_DESCRIPTION("Maple bus driver for Dreamcast");
@@ -46,14 +45,15 @@ static DECLARE_WORK(maple_vblank_process, maple_vblank_handler);
 static LIST_HEAD(maple_waitq);
 static LIST_HEAD(maple_sentq);
 
-static DEFINE_MUTEX(maple_list_lock);
+/* mutex to protect queue of waiting packets */
+static DEFINE_MUTEX(maple_wlist_lock);
 
 static struct maple_driver maple_dummy_driver;
 static struct device maple_bus;
 static int subdevice_map[MAPLE_PORTS];
 static unsigned long *maple_sendbuf, *maple_sendptr, *maple_lastptr;
 static unsigned long maple_pnp_time;
-static int started, scanning, liststatus, fullscan;
+static int started, scanning, fullscan;
 static struct kmem_cache *maple_queue_cache;
 
 struct maple_device_specify {
@@ -129,35 +129,124 @@ static void maple_release_device(struct device *dev)
 	kfree(mdev);
 }
 
-/**
+/*
  * maple_add_packet - add a single instruction to the queue
- * @mq: instruction to add to waiting queue
+ * @mdev - maple device
+ * @function - function on device being queried
+ * @command - maple command to add
+ * @length - length of command string (in 32 bit words)
+ * @data - remainder of command string
  */
-void maple_add_packet(struct mapleq *mq)
+int maple_add_packet(struct maple_device *mdev, u32 function, u32 command,
+	size_t length, void *data)
 {
-	mutex_lock(&maple_list_lock);
-	list_add(&mq->list, &maple_waitq);
-	mutex_unlock(&maple_list_lock);
+	int locking, ret = 0;
+	void *sendbuf = NULL;
+
+	mutex_lock(&maple_wlist_lock);
+	/* bounce if device already locked */
+	locking = mutex_is_locked(&mdev->mq->mutex);
+	if (locking) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	mutex_lock(&mdev->mq->mutex);
+
+	if (length) {
+		sendbuf = kmalloc(length * 4, GFP_KERNEL);
+		if (!sendbuf) {
+			mutex_unlock(&mdev->mq->mutex);
+			ret = -ENOMEM;
+			goto out;
+		}
+		((__be32 *)sendbuf)[0] = cpu_to_be32(function);
+	}
+
+	mdev->mq->command = command;
+	mdev->mq->length = length;
+	if (length > 1)
+		memcpy(sendbuf + 4, data, (length - 1) * 4);
+	mdev->mq->sendbuf = sendbuf;
+
+	list_add(&mdev->mq->list, &maple_waitq);
+out:
+	mutex_unlock(&maple_wlist_lock);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(maple_add_packet);
 
+/*
+ * maple_add_packet_sleeps - add a single instruction to the queue
+ *  - waits for lock to be free
+ * @mdev - maple device
+ * @function - function on device being queried
+ * @command - maple command to add
+ * @length - length of command string (in 32 bit words)
+ * @data - remainder of command string
+ */
+int maple_add_packet_sleeps(struct maple_device *mdev, u32 function,
+	u32 command, size_t length, void *data)
+{
+	int locking, ret = 0;
+	void *sendbuf = NULL;
+
+	locking = mutex_lock_interruptible(&mdev->mq->mutex);
+	if (locking) {
+		ret = -EIO;
+		goto out;
+	}
+
+	if (length) {
+		sendbuf = kmalloc(length * 4, GFP_KERNEL);
+		if (!sendbuf) {
+			mutex_unlock(&mdev->mq->mutex);
+			ret = -ENOMEM;
+			goto out;
+		}
+		((__be32 *)sendbuf)[0] = cpu_to_be32(function);
+	}
+
+	mdev->mq->command = command;
+	mdev->mq->length = length;
+	if (length > 1)
+		memcpy(sendbuf + 4, data, (length - 1) * 4);
+	mdev->mq->sendbuf = sendbuf;
+
+	mutex_lock(&maple_wlist_lock);
+	list_add(&mdev->mq->list, &maple_waitq);
+	mutex_unlock(&maple_wlist_lock);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(maple_add_packet_sleeps);
+
 static struct mapleq *maple_allocq(struct maple_device *mdev)
 {
 	struct mapleq *mq;
 
 	mq = kmalloc(sizeof(*mq), GFP_KERNEL);
 	if (!mq)
-		return NULL;
+		goto failed_nomem;
 
 	mq->dev = mdev;
 	mq->recvbufdcsp = kmem_cache_zalloc(maple_queue_cache, GFP_KERNEL);
 	mq->recvbuf = (void *) P2SEGADDR(mq->recvbufdcsp);
-	if (!mq->recvbuf) {
-		kfree(mq);
-		return NULL;
-	}
+	if (!mq->recvbuf)
+		goto failed_p2;
+	/*
+	 * most devices do not need the mutex - but
+	 * anything that injects block reads or writes
+	 * will rely on it
+	 */
+	mutex_init(&mq->mutex);
 
 	return mq;
+
+failed_p2:
+	kfree(mq);
+failed_nomem:
+	return NULL;
 }
 
 static struct maple_device *maple_alloc_dev(int port, int unit)
@@ -178,7 +267,6 @@ static struct maple_device *maple_alloc_dev(int port, int unit)
 	}
 	mdev->dev.bus = &maple_bus_type;
 	mdev->dev.parent = &maple_bus;
-	mdev->function = 0;
 	return mdev;
 }
 
@@ -216,7 +304,6 @@ static void maple_build_block(struct mapleq *mq)
 	*maple_sendptr++ = PHYSADDR(mq->recvbuf);
 	*maple_sendptr++ =
 	    mq->command | (to << 8) | (from << 16) | (len << 24);
-
 	while (len-- > 0)
 		*maple_sendptr++ = *lsendbuf++;
 }
@@ -224,22 +311,27 @@ static void maple_build_block(struct mapleq *mq)
 /* build up command queue */
 static void maple_send(void)
 {
-	int i;
-	int maple_packets;
+	int i, maple_packets = 0;
 	struct mapleq *mq, *nmq;
 
 	if (!list_empty(&maple_sentq))
 		return;
-	if (list_empty(&maple_waitq) || !maple_dma_done())
+	mutex_lock(&maple_wlist_lock);
+	if (list_empty(&maple_waitq) || !maple_dma_done()) {
+		mutex_unlock(&maple_wlist_lock);
 		return;
-	maple_packets = 0;
-	maple_sendptr = maple_lastptr = maple_sendbuf;
+	}
+	mutex_unlock(&maple_wlist_lock);
+	maple_lastptr = maple_sendbuf;
+	maple_sendptr = maple_sendbuf;
+	mutex_lock(&maple_wlist_lock);
 	list_for_each_entry_safe(mq, nmq, &maple_waitq, list) {
 		maple_build_block(mq);
 		list_move(&mq->list, &maple_sentq);
 		if (maple_packets++ > MAPLE_MAXPACKETS)
 			break;
 	}
+	mutex_unlock(&maple_wlist_lock);
 	if (maple_packets > 0) {
 		for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++)
 			dma_cache_sync(0, maple_sendbuf + i * PAGE_SIZE,
@@ -247,7 +339,8 @@ static void maple_send(void)
 	}
 }
 
-static int attach_matching_maple_driver(struct device_driver *driver,
+/* check if there is a driver registered likely to match this device */
+static int check_matching_maple_driver(struct device_driver *driver,
 					void *devptr)
 {
 	struct maple_driver *maple_drv;
@@ -255,12 +348,8 @@ static int attach_matching_maple_driver(struct device_driver *driver,
 
 	mdev = devptr;
 	maple_drv = to_maple_driver(driver);
-	if (mdev->devinfo.function & be32_to_cpu(maple_drv->function)) {
-		if (maple_drv->connect(mdev) == 0) {
-			mdev->driver = maple_drv;
-			return 1;
-		}
-	}
+	if (mdev->devinfo.function & cpu_to_be32(maple_drv->function))
+		return 1;
 	return 0;
 }
 
@@ -268,11 +357,6 @@ static void maple_detach_driver(struct maple_device *mdev)
 {
 	if (!mdev)
 		return;
-	if (mdev->driver) {
-		if (mdev->driver->disconnect)
-			mdev->driver->disconnect(mdev);
-	}
-	mdev->driver = NULL;
 	device_unregister(&mdev->dev);
 	mdev = NULL;
 }
@@ -328,8 +412,8 @@ static void maple_attach_driver(struct maple_device *mdev)
 			mdev->port, mdev->unit, function);
 
 		matched =
-		    bus_for_each_drv(&maple_bus_type, NULL, mdev,
-				     attach_matching_maple_driver);
+			bus_for_each_drv(&maple_bus_type, NULL, mdev,
+				check_matching_maple_driver);
 
 		if (matched == 0) {
 			/* Driver does not exist yet */
@@ -373,45 +457,48 @@ static int detach_maple_device(struct device *device, void *portptr)
 
 static int setup_maple_commands(struct device *device, void *ignored)
 {
+	int add;
 	struct maple_device *maple_dev = to_maple_dev(device);
 
 	if ((maple_dev->interval > 0)
 	    && time_after(jiffies, maple_dev->when)) {
-		maple_dev->when = jiffies + maple_dev->interval;
-		maple_dev->mq->command = MAPLE_COMMAND_GETCOND;
-		maple_dev->mq->sendbuf = &maple_dev->function;
-		maple_dev->mq->length = 1;
-		maple_add_packet(maple_dev->mq);
-		liststatus++;
+		/* bounce if we cannot lock */
+		add = maple_add_packet(maple_dev,
+			be32_to_cpu(maple_dev->devinfo.function),
+			MAPLE_COMMAND_GETCOND, 1, NULL);
+		if (!add)
+			maple_dev->when = jiffies + maple_dev->interval;
 	} else {
-		if (time_after(jiffies, maple_pnp_time)) {
-			maple_dev->mq->command = MAPLE_COMMAND_DEVINFO;
-			maple_dev->mq->length = 0;
-			maple_add_packet(maple_dev->mq);
-			liststatus++;
-		}
+		if (time_after(jiffies, maple_pnp_time))
+			/* This will also bounce */
+			maple_add_packet(maple_dev, 0,
+				MAPLE_COMMAND_DEVINFO, 0, NULL);
 	}
-
 	return 0;
 }
 
 /* VBLANK bottom half - implemented via workqueue */
 static void maple_vblank_handler(struct work_struct *work)
 {
-	if (!maple_dma_done())
-		return;
-	if (!list_empty(&maple_sentq))
+	if (!list_empty(&maple_sentq) || !maple_dma_done())
 		return;
+
 	ctrl_outl(0, MAPLE_ENABLE);
-	liststatus = 0;
+
 	bus_for_each_dev(&maple_bus_type, NULL, NULL,
 			 setup_maple_commands);
+
 	if (time_after(jiffies, maple_pnp_time))
 		maple_pnp_time = jiffies + MAPLE_PNP_INTERVAL;
-	if (liststatus && list_empty(&maple_sentq)) {
-		INIT_LIST_HEAD(&maple_sentq);
+
+	mutex_lock(&maple_wlist_lock);
+	if (!list_empty(&maple_waitq) && list_empty(&maple_sentq)) {
+		mutex_unlock(&maple_wlist_lock);
 		maple_send();
+	} else {
+		mutex_unlock(&maple_wlist_lock);
 	}
+
 	maplebus_dma_reset();
 }
 
@@ -422,8 +509,8 @@ static void maple_map_subunits(struct maple_device *mdev, int submask)
 	struct maple_device *mdev_add;
 	struct maple_device_specify ds;
 
+	ds.port = mdev->port;
 	for (k = 0; k < 5; k++) {
-		ds.port = mdev->port;
 		ds.unit = k + 1;
 		retval =
 		    bus_for_each_dev(&maple_bus_type, NULL, &ds,
@@ -437,9 +524,9 @@ static void maple_map_subunits(struct maple_device *mdev, int submask)
 			mdev_add = maple_alloc_dev(mdev->port, k + 1);
 			if (!mdev_add)
 				return;
-			mdev_add->mq->command = MAPLE_COMMAND_DEVINFO;
-			mdev_add->mq->length = 0;
-			maple_add_packet(mdev_add->mq);
+			maple_add_packet(mdev_add, 0, MAPLE_COMMAND_DEVINFO,
+				0, NULL);
+			/* mark that we are checking sub devices */
 			scanning = 1;
 		}
 		submask = submask >> 1;
@@ -505,6 +592,28 @@ static void maple_response_devinfo(struct maple_device *mdev,
 	}
 }
 
+static void maple_port_rescan(void)
+{
+	int i;
+	struct maple_device *mdev;
+
+	fullscan = 1;
+	for (i = 0; i < MAPLE_PORTS; i++) {
+		if (checked[i] == false) {
+			fullscan = 0;
+			mdev = baseunits[i];
+			/*
+			 *  test lock in case scan has failed
+			 *  but device is still locked
+			 */
+			if (mutex_is_locked(&mdev->mq->mutex))
+				mutex_unlock(&mdev->mq->mutex);
+			maple_add_packet(mdev, 0, MAPLE_COMMAND_DEVINFO,
+				0, NULL);
+		}
+	}
+}
+
 /* maple dma end bottom half - implemented via workqueue */
 static void maple_dma_handler(struct work_struct *work)
 {
@@ -512,7 +621,6 @@ static void maple_dma_handler(struct work_struct *work)
 	struct maple_device *dev;
 	char *recvbuf;
 	enum maple_code code;
-	int i;
 
 	if (!maple_dma_done())
 		return;
@@ -522,6 +630,10 @@ static void maple_dma_handler(struct work_struct *work)
 			recvbuf = mq->recvbuf;
 			code = recvbuf[0];
 			dev = mq->dev;
+			kfree(mq->sendbuf);
+			mutex_unlock(&mq->mutex);
+			list_del_init(&mq->list);
+
 			switch (code) {
 			case MAPLE_RESPONSE_NONE:
 				maple_response_none(dev, mq);
@@ -558,26 +670,16 @@ static void maple_dma_handler(struct work_struct *work)
 				break;
 			}
 		}
-		INIT_LIST_HEAD(&maple_sentq);
+		/* if scanning is 1 then we have subdevices to check */
 		if (scanning == 1) {
 			maple_send();
 			scanning = 2;
 		} else
 			scanning = 0;
-
-		if (!fullscan) {
-			fullscan = 1;
-			for (i = 0; i < MAPLE_PORTS; i++) {
-				if (checked[i] == false) {
-					fullscan = 0;
-					dev = baseunits[i];
-					dev->mq->command =
-						MAPLE_COMMAND_DEVINFO;
-					dev->mq->length = 0;
-					maple_add_packet(dev->mq);
-				}
-			}
-		}
+		/*check if we have actually tested all ports yet */
+		if (!fullscan)
+			maple_port_rescan();
+		/* mark that we have been through the first scan */
 		if (started == 0)
 			started = 1;
 	}
@@ -631,7 +733,7 @@ static int match_maple_bus_driver(struct device *devptr,
 	if (maple_dev->devinfo.function == 0xFFFFFFFF)
 		return 0;
 	else if (maple_dev->devinfo.function &
-		 be32_to_cpu(maple_drv->function))
+		 cpu_to_be32(maple_drv->function))
 		return 1;
 	return 0;
 }
@@ -713,6 +815,9 @@ static int __init maple_bus_init(void)
 	if (!maple_queue_cache)
 		goto cleanup_bothirqs;
 
+	INIT_LIST_HEAD(&maple_waitq);
+	INIT_LIST_HEAD(&maple_sentq);
+
 	/* setup maple ports */
 	for (i = 0; i < MAPLE_PORTS; i++) {
 		checked[i] = false;
@@ -723,9 +828,7 @@ static int __init maple_bus_init(void)
 				maple_free_dev(mdev[i]);
 			goto cleanup_cache;
 		}
-		mdev[i]->mq->command = MAPLE_COMMAND_DEVINFO;
-		mdev[i]->mq->length = 0;
-		maple_add_packet(mdev[i]->mq);
+		maple_add_packet(mdev[i], 0, MAPLE_COMMAND_DEVINFO, 0, NULL);
 		subdevice_map[i] = 0;
 	}
 
diff --git a/include/linux/maple.h b/include/linux/maple.h
index 523a286bb477..c853b1066018 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -2,6 +2,7 @@
 #define __LINUX_MAPLE_H
 
 #include <linux/device.h>
+#include <mach/maple.h>
 
 extern struct bus_type maple_bus_type;
 
@@ -33,6 +34,7 @@ struct mapleq {
 	void *sendbuf, *recvbuf, *recvbufdcsp;
 	unsigned char length;
 	enum maple_code command;
+	struct mutex mutex;
 };
 
 struct maple_devinfo {
@@ -69,7 +71,9 @@ void maple_getcond_callback(struct maple_device *dev,
 			    unsigned long interval,
 			    unsigned long function);
 int maple_driver_register(struct device_driver *drv);
-void maple_add_packet(struct mapleq *mq);
+int maple_add_packet_sleeps(struct maple_device *mdev, u32 function,
+	u32 command, u32 length, void *data);
+void maple_clear_dev(struct maple_device *mdev);
 
 #define to_maple_dev(n) container_of(n, struct maple_device, dev)
 #define to_maple_driver(n) container_of(n, struct maple_driver, drv)
-- 
cgit v1.2.3


From f1b23361a0f15497d4c6795a2935b2e98064ddfb Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 21:18:19 -0300
Subject: rfkill: document the rfkill struct locking (v2)

Reorder fields in struct rfkill and add comments to make it clear
which fields are protected by rfkill->mutex.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index c5f6e54ec6ae..741d1a62cc3f 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -68,7 +68,8 @@ enum rfkill_state {
  * @user_claim_unsupported: Whether the hardware supports exclusive
  *	RF-kill control by userspace. Set this before registering.
  * @user_claim: Set when the switch is controlled exlusively by userspace.
- * @mutex: Guards switch state transitions
+ * @mutex: Guards switch state transitions.  It serializes callbacks
+ *	and also protects the state.
  * @data: Pointer to the RF button drivers private data which will be
  *	passed along when toggling radio state.
  * @toggle_radio(): Mandatory handler to control state of the radio.
@@ -89,12 +90,13 @@ struct rfkill {
 	const char *name;
 	enum rfkill_type type;
 
-	enum rfkill_state state;
 	bool user_claim_unsupported;
 	bool user_claim;
 
+	/* the mutex serializes callbacks and also protects
+	 * the state */
 	struct mutex mutex;
-
+	enum rfkill_state state;
 	void *data;
 	int (*toggle_radio)(void *data, enum rfkill_state state);
 	int (*get_state)(void *data, enum rfkill_state *state);
-- 
cgit v1.2.3


From d0f09804144fd9471a13cf4d80e66842c7fa114f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 29 Jul 2008 11:32:07 +0200
Subject: mac80211: partially fix skb->cb use

This patch fixes mac80211 to not use the skb->cb over the queue step
from virtual interfaces to the master. The patch also, for now,
disables aggregation because that would still require requeuing,
will fix that in a separate patch. There are two other places (software
requeue and powersaving stations) where requeue can happen, but that is
not currently used by any drivers/not possible to use respectively.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/base.c           |  2 +-
 drivers/net/wireless/b43/xmit.c             |  2 +-
 drivers/net/wireless/b43legacy/xmit.c       |  2 +-
 drivers/net/wireless/iwlwifi/iwl-tx.c       |  2 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  2 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c     |  2 +-
 include/linux/skbuff.h                      |  5 ++-
 include/net/mac80211.h                      |  6 ----
 net/core/skbuff.c                           |  3 ++
 net/mac80211/main.c                         |  8 +----
 net/mac80211/mlme.c                         |  8 ++---
 net/mac80211/tx.c                           | 47 +++++++++++++----------------
 net/mac80211/wme.c                          |  3 ++
 13 files changed, 40 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 1106d1c06298..ff3fad794b61 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -1237,7 +1237,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 
 	pktlen = skb->len;
 
-	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) {
+	if (info->control.hw_key) {
 		keyidx = info->control.hw_key->hw_key_idx;
 		pktlen += info->control.icv_len;
 	}
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 8d54502222a6..9dda8169f7cc 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -192,7 +192,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 	const struct b43_phy *phy = &dev->phy;
 	const struct ieee80211_hdr *wlhdr =
 	    (const struct ieee80211_hdr *)fragment_data;
-	int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
+	int use_encryption = !!info->control.hw_key;
 	__le16 fctl = wlhdr->frame_control;
 	struct ieee80211_rate *fbrate;
 	u8 rate, rate_fb;
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index e969ed8d412d..68e1f8c78727 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -192,7 +192,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 			       u16 cookie)
 {
 	const struct ieee80211_hdr *wlhdr;
-	int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
+	int use_encryption = !!info->control.hw_key;
 	u16 fctl;
 	u8 rate;
 	struct ieee80211_rate *rate_fb;
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 9b50b1052b09..f72cd0bf6aa3 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -906,7 +906,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 	 * first entry */
 	iwl_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
 
-	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+	if (info->control.hw_key)
 		iwl_tx_cmd_build_hwcrypto(priv, info, tx_cmd, skb, sta_id);
 
 	/* Set up TFD's 2nd entry to point directly to remainder of skb,
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 05121f395c4e..7c82ecfa30a4 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2667,7 +2667,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb)
 	 * first entry */
 	iwl3945_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
 
-	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+	if (info->control.hw_key)
 		iwl3945_build_tx_cmd_hwcrypto(priv, info, out_cmd, skb, 0);
 
 	/* Set up TFD's 2nd entry to point directly to remainder of skb,
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 042ab00d8bd2..c3ee4ecba792 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -63,7 +63,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 	 */
 	memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb));
 	rts_info = IEEE80211_SKB_CB(skb);
-	rts_info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	rts_info->control.hw_key = NULL;
 	rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS;
 	rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT;
 	rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ea44f6621f2..a640385e0598 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -316,7 +316,10 @@ struct sk_buff {
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
-	/* 14 bit hole */
+#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
+	__u8			do_not_encrypt:1;
+#endif
+	/* 0/13/14 bit hole */
 
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 74487f268237..b52721008be8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -206,8 +206,6 @@ struct ieee80211_bss_conf {
  * These flags are used with the @flags member of &ieee80211_tx_info.
  *
  * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame.
- * @IEEE80211_TX_CTL_DO_NOT_ENCRYPT: send this frame without encryption;
- *	e.g., for EAPOL frame
  * @IEEE80211_TX_CTL_USE_RTS_CTS: use RTS-CTS before sending frame
  * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g.,
  *	for combined 802.11g / 802.11b networks)
@@ -220,7 +218,6 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_SHORT_PREAMBLE: TBD
  * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the
  *	through set_retry_limit configured long retry value
- * @IEEE80211_TX_CTL_EAPOL_FRAME: internal to mac80211
  * @IEEE80211_TX_CTL_SEND_AFTER_DTIM: send this frame after DTIM beacon
  * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU
  * @IEEE80211_TX_CTL_OFDM_HT: this frame can be sent in HT OFDM rates. number
@@ -253,7 +250,6 @@ struct ieee80211_bss_conf {
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
-	IEEE80211_TX_CTL_DO_NOT_ENCRYPT		= BIT(1),
 	IEEE80211_TX_CTL_USE_RTS_CTS		= BIT(2),
 	IEEE80211_TX_CTL_USE_CTS_PROTECT	= BIT(3),
 	IEEE80211_TX_CTL_NO_ACK			= BIT(4),
@@ -263,7 +259,6 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_FIRST_FRAGMENT		= BIT(8),
 	IEEE80211_TX_CTL_SHORT_PREAMBLE		= BIT(9),
 	IEEE80211_TX_CTL_LONG_RETRY_LIMIT	= BIT(10),
-	IEEE80211_TX_CTL_EAPOL_FRAME		= BIT(11),
 	IEEE80211_TX_CTL_SEND_AFTER_DTIM	= BIT(12),
 	IEEE80211_TX_CTL_AMPDU			= BIT(13),
 	IEEE80211_TX_CTL_OFDM_HT		= BIT(14),
@@ -323,7 +318,6 @@ struct ieee80211_tx_info {
 			struct ieee80211_vif *vif;
 			struct ieee80211_key_conf *hw_key;
 			unsigned long jiffies;
-			int ifindex;
 			u16 aid;
 			s8 rts_cts_rate_idx, alt_retry_rate_idx;
 			u8 retry_limit;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e0c92274189..84640172d65d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -485,6 +485,9 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(head);
 	C(data);
 	C(truesize);
+#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
+	C(do_not_encrypt);
+#endif
 	atomic_set(&n->users, 1);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b5830f7055cf..a4c5b90de769 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1233,18 +1233,12 @@ static void ieee80211_tasklet_handler(unsigned long data)
 /* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to
  * make a prepared TX frame (one that has been given to hw) to look like brand
  * new IEEE 802.11 frame that is ready to go through TX processing again.
- * Also, tx_packet_data in cb is restored from tx_control. */
+ */
 static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
 				      struct ieee80211_key *key,
 				      struct sk_buff *skb)
 {
 	int hdrlen, iv_len, mic_len;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
-	info->flags &=	IEEE80211_TX_CTL_REQ_TX_STATUS |
-			IEEE80211_TX_CTL_DO_NOT_ENCRYPT |
-			IEEE80211_TX_CTL_REQUEUE |
-			IEEE80211_TX_CTL_EAPOL_FRAME;
 
 	hdrlen = ieee80211_get_hdrlen_from_skb(skb);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d7c371e36bf0..35eb767cbcbe 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -606,7 +606,6 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
 		      int encrypt)
 {
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_tx_info *info;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	skb->dev = sdata->local->mdev;
@@ -614,11 +613,8 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
 	skb_set_network_header(skb, 0);
 	skb_set_transport_header(skb, 0);
 
-	info = IEEE80211_SKB_CB(skb);
-	memset(info, 0, sizeof(struct ieee80211_tx_info));
-	info->control.ifindex = sdata->dev->ifindex;
-	if (!encrypt)
-		info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	skb->iif = sdata->dev->ifindex;
+	skb->do_not_encrypt = !encrypt;
 
 	dev_queue_xmit(skb);
 }
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c5f78059c6ca..69019e943873 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -439,14 +439,14 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	u16 fc = tx->fc;
 
-	if (unlikely(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+	if (unlikely(tx->skb->do_not_encrypt))
 		tx->key = NULL;
 	else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
 		tx->key = key;
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
 		tx->key = key;
 	else if (tx->sdata->drop_unencrypted &&
-		 !(info->flags & IEEE80211_TX_CTL_EAPOL_FRAME) &&
+		 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) &&
 		 !(info->flags & IEEE80211_TX_CTL_INJECTED)) {
 		I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
 		return TX_DROP;
@@ -476,7 +476,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	}
 
 	if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
-		info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+		tx->skb->do_not_encrypt = 1;
 
 	return TX_CONTINUE;
 }
@@ -732,6 +732,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 		memcpy(skb_put(frag, copylen), pos, copylen);
 		memcpy(frag->cb, first->cb, sizeof(frag->cb));
 		skb_copy_queue_mapping(frag, first);
+		frag->do_not_encrypt = first->do_not_encrypt;
 
 		pos += copylen;
 		left -= copylen;
@@ -852,7 +853,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	skb->do_not_encrypt = 1;
 	info->flags |= IEEE80211_TX_CTL_INJECTED;
 	tx->flags &= ~IEEE80211_TX_FRAGMENTED;
 
@@ -925,8 +926,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 				skb_trim(skb, skb->len - FCS_LEN);
 			}
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP)
-				info->flags &=
-					~IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+				tx->skb->do_not_encrypt = 0;
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG)
 				tx->flags |= IEEE80211_TX_FRAGMENTED;
 			break;
@@ -1042,10 +1042,9 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 				struct sk_buff *skb,
 				struct net_device *mdev)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct net_device *dev;
 
-	dev = dev_get_by_index(&init_net, info->control.ifindex);
+	dev = dev_get_by_index(&init_net, skb->iif);
 	if (unlikely(dev && !is_ieee80211_device(dev, mdev))) {
 		dev_put(dev);
 		dev = NULL;
@@ -1306,8 +1305,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 	bool may_encrypt;
 	int ret;
 
-	if (info->control.ifindex)
-		odev = dev_get_by_index(&init_net, info->control.ifindex);
+	if (skb->iif)
+		odev = dev_get_by_index(&init_net, skb->iif);
 	if (unlikely(odev && !is_ieee80211_device(odev, dev))) {
 		dev_put(odev);
 		odev = NULL;
@@ -1321,9 +1320,13 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		return 0;
 	}
 
+	memset(info, 0, sizeof(*info));
+
+	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+
 	osdata = IEEE80211_DEV_TO_SUB_IF(odev);
 
-	may_encrypt = !(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT);
+	may_encrypt = !skb->do_not_encrypt;
 
 	headroom = osdata->local->tx_headroom;
 	if (may_encrypt)
@@ -1348,7 +1351,6 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 				 struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_radiotap_header *prthdr =
 		(struct ieee80211_radiotap_header *)skb->data;
 	u16 len_rthdr;
@@ -1371,11 +1373,11 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	skb->dev = local->mdev;
 
 	/* needed because we set skb device to master */
-	info->control.ifindex = dev->ifindex;
+	skb->iif = dev->ifindex;
 
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
-	/* Interfaces should always request a status report */
-	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+	/* sometimes we do encrypt injected frames, will be fixed
+	 * up in radiotap parser if not wanted */
+	skb->do_not_encrypt = 0;
 
 	/*
 	 * fix up the pointers accounting for the radiotap
@@ -1419,7 +1421,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 			       struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_tx_info *info;
 	struct ieee80211_sub_if_data *sdata;
 	int ret = 1, head_need;
 	u16 ethertype, hdrlen,  meshhdrlen = 0;
@@ -1645,14 +1646,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	nh_pos += hdrlen;
 	h_pos += hdrlen;
 
-	info = IEEE80211_SKB_CB(skb);
-	memset(info, 0, sizeof(*info));
-	info->control.ifindex = dev->ifindex;
-	if (ethertype == ETH_P_PAE)
-		info->flags |= IEEE80211_TX_CTL_EAPOL_FRAME;
-
-	/* Interfaces should always request a status report */
-	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+	skb->iif = dev->ifindex;
 
 	skb->dev = local->mdev;
 	dev->stats.tx_packets++;
@@ -1922,6 +1916,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 
 	info = IEEE80211_SKB_CB(skb);
 
+	skb->do_not_encrypt = 1;
+
 	info->band = band;
 	rate_control_get_rate(local->mdev, sband, skb, &rsel);
 
@@ -1940,7 +1936,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	info->tx_rate_idx = rsel.rate_idx;
 
 	info->flags |= IEEE80211_TX_CTL_NO_ACK;
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
 	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
 	if (sdata->bss_conf.use_short_preamble &&
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 07edda0b8a5c..28437f0001db 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -188,6 +188,9 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 {
 	int i;
 
+	/* XXX: currently broken due to cb/requeue use */
+	return -EPERM;
+
 	/* prepare the filter and save it for the SW queue
 	 * matching the received HW queue */
 
-- 
cgit v1.2.3


From ce0ad7f0952581ba75ab6aee55bb1ed9bb22cf4f Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 30 Jul 2008 15:23:13 +1000
Subject: powerpc/mm: Lockless get_user_pages_fast() for 64-bit (v3)

Implement lockless get_user_pages_fast for 64-bit powerpc.

Page table existence is guaranteed with RCU, and speculative page references
are used to take a reference to the pages without having a prior existence
guarantee on them.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/Kconfig                |   3 +
 arch/powerpc/mm/Makefile            |   3 +-
 arch/powerpc/mm/gup.c               | 280 ++++++++++++++++++++++++++++++++++++
 include/asm-powerpc/pgtable-ppc64.h |   2 +
 include/linux/pagemap.h             |  23 +++
 5 files changed, 310 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/mm/gup.c

(limited to 'include/linux')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 587da5e0990f..63c9cafda9c4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -42,6 +42,9 @@ config GENERIC_HARDIRQS
 	bool
 	default y
 
+config HAVE_GET_USER_PAGES_FAST
+	def_bool PPC64
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool PPC64
 
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 1c00e0196f6c..e7392b45a5ef 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -12,7 +12,8 @@ obj-y				:= fault.o mem.o \
 				   mmu_context_$(CONFIG_WORD_SIZE).o
 hash-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
 obj-$(CONFIG_PPC64)		+= hash_utils_64.o \
-				   slb_low.o slb.o stab.o mmap.o $(hash-y)
+				   slb_low.o slb.o stab.o \
+				   gup.o mmap.o $(hash-y)
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o
 obj-$(CONFIG_PPC_STD_MMU)	+= hash_low_$(CONFIG_WORD_SIZE).o \
 				   tlb_$(CONFIG_WORD_SIZE).o
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
new file mode 100644
index 000000000000..9fdf4d6335e4
--- /dev/null
+++ b/arch/powerpc/mm/gup.c
@@ -0,0 +1,280 @@
+/*
+ * Lockless get_user_pages_fast for powerpc
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/vmstat.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <asm/pgtable.h>
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long mask, result;
+	pte_t *ptep;
+
+	result = _PAGE_PRESENT|_PAGE_USER;
+	if (write)
+		result |= _PAGE_RW;
+	mask = result | _PAGE_SPECIAL;
+
+	ptep = pte_offset_kernel(&pmd, addr);
+	do {
+		pte_t pte = *ptep;
+		struct page *page;
+
+		if ((pte_val(pte) & mask) != result)
+			return 0;
+		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+		page = pte_page(pte);
+		if (!page_cache_get_speculative(page))
+			return 0;
+		if (unlikely(pte != *ptep)) {
+			put_page(page);
+			return 0;
+		}
+		pages[*nr] = page;
+		(*nr)++;
+
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
+
+	return 1;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate,
+				 unsigned long *addr, unsigned long end,
+				 int write, struct page **pages, int *nr)
+{
+	unsigned long mask;
+	unsigned long pte_end;
+	struct page *head, *page;
+	pte_t pte;
+	int refs;
+
+	pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate);
+	if (pte_end < end)
+		end = pte_end;
+
+	pte = *ptep;
+	mask = _PAGE_PRESENT|_PAGE_USER;
+	if (write)
+		mask |= _PAGE_RW;
+	if ((pte_val(pte) & mask) != mask)
+		return 0;
+	/* hugepages are never "special" */
+	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+	refs = 0;
+	head = pte_page(pte);
+	page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT);
+	do {
+		VM_BUG_ON(compound_head(page) != head);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (*addr += PAGE_SIZE, *addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+	if (unlikely(pte != *ptep)) {
+		/* Could be optimized better */
+		while (*nr) {
+			put_page(page);
+			(*nr)--;
+		}
+	}
+
+	return 1;
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pmd_t *pmdp;
+
+	pmdp = pmd_offset(&pud, addr);
+	do {
+		pmd_t pmd = *pmdp;
+
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(pmd))
+			return 0;
+		if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+			return 0;
+	} while (pmdp++, addr = next, addr != end);
+
+	return 1;
+}
+
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pud_t *pudp;
+
+	pudp = pud_offset(&pgd, addr);
+	do {
+		pud_t pud = *pudp;
+
+		next = pud_addr_end(addr, end);
+		if (pud_none(pud))
+			return 0;
+		if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+			return 0;
+	} while (pudp++, addr = next, addr != end);
+
+	return 1;
+}
+
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next;
+	pgd_t *pgdp;
+	int psize, nr = 0;
+	unsigned int shift;
+
+	pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+
+	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+					start, len)))
+		goto slow_irqon;
+
+	pr_debug("  aligned: %lx .. %lx\n", start, end);
+
+#ifdef CONFIG_HUGETLB_PAGE
+	/* We bail out on slice boundary crossing when hugetlb is
+	 * enabled in order to not have to deal with two different
+	 * page table formats
+	 */
+	if (addr < SLICE_LOW_TOP) {
+		if (end > SLICE_LOW_TOP)
+			goto slow_irqon;
+
+		if (unlikely(GET_LOW_SLICE_INDEX(addr) !=
+			     GET_LOW_SLICE_INDEX(end - 1)))
+			goto slow_irqon;
+	} else {
+		if (unlikely(GET_HIGH_SLICE_INDEX(addr) !=
+			     GET_HIGH_SLICE_INDEX(end - 1)))
+			goto slow_irqon;
+	}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+	/*
+	 * XXX: batch / limit 'nr', to avoid large irq off latency
+	 * needs some instrumenting to determine the common sizes used by
+	 * important workloads (eg. DB2), and whether limiting the batch size
+	 * will decrease performance.
+	 *
+	 * It seems like we're in the clear for the moment. Direct-IO is
+	 * the main guy that batches up lots of get_user_pages, and even
+	 * they are limited to 64-at-a-time which is not so many.
+	 */
+	/*
+	 * This doesn't prevent pagetable teardown, but does prevent
+	 * the pagetables from being freed on powerpc.
+	 *
+	 * So long as we atomically load page table pointers versus teardown,
+	 * we can follow the address down to the the page and take a ref on it.
+	 */
+	local_irq_disable();
+
+	psize = get_slice_psize(mm, addr);
+	shift = mmu_psize_defs[psize].shift;
+
+#ifdef CONFIG_HUGETLB_PAGE
+	if (unlikely(mmu_huge_psizes[psize])) {
+		pte_t *ptep;
+		unsigned long a = addr;
+		unsigned long sz = ((1UL) << shift);
+		struct hstate *hstate = size_to_hstate(sz);
+
+		BUG_ON(!hstate);
+		/*
+		 * XXX: could be optimized to avoid hstate
+		 * lookup entirely (just use shift)
+		 */
+
+		do {
+			VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift);
+			ptep = huge_pte_offset(mm, a);
+			pr_debug(" %016lx: huge ptep %p\n", a, ptep);
+			if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages,
+						   &nr))
+				goto slow;
+		} while (a != end);
+	} else
+#endif /* CONFIG_HUGETLB_PAGE */
+	{
+		pgdp = pgd_offset(mm, addr);
+		do {
+			pgd_t pgd = *pgdp;
+
+			VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift);
+			pr_debug("  %016lx: normal pgd %p\n", addr, (void *)pgd);
+			next = pgd_addr_end(addr, end);
+			if (pgd_none(pgd))
+				goto slow;
+			if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+				goto slow;
+		} while (pgdp++, addr = next, addr != end);
+	}
+	local_irq_enable();
+
+	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+	return nr;
+
+	{
+		int ret;
+
+slow:
+		local_irq_enable();
+slow_irqon:
+		pr_debug("  slow path ! nr = %d\n", nr);
+
+		/* Try to get the remaining pages with get_user_pages */
+		start += nr << PAGE_SHIFT;
+		pages += nr;
+
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm, start,
+			(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+		up_read(&mm->mmap_sem);
+
+		/* Have to be a bit careful with return values */
+		if (nr > 0) {
+			if (ret < 0)
+				ret = nr;
+			else
+				ret += nr;
+		}
+
+		return ret;
+	}
+}
diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h
index 5fc78c0be302..74c6f380b805 100644
--- a/include/asm-powerpc/pgtable-ppc64.h
+++ b/include/asm-powerpc/pgtable-ppc64.h
@@ -461,6 +461,8 @@ void pgtable_cache_init(void);
 	return pt;
 }
 
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long address);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a39b38ccdc97..69ed3cb1197a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -143,6 +143,29 @@ static inline int page_cache_get_speculative(struct page *page)
 	return 1;
 }
 
+/*
+ * Same as above, but add instead of inc (could just be merged)
+ */
+static inline int page_cache_add_speculative(struct page *page, int count)
+{
+	VM_BUG_ON(in_interrupt());
+
+#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU)
+# ifdef CONFIG_PREEMPT
+	VM_BUG_ON(!in_atomic());
+# endif
+	VM_BUG_ON(page_count(page) == 0);
+	atomic_add(count, &page->_count);
+
+#else
+	if (unlikely(!atomic_add_unless(&page->_count, count, 0)))
+		return 0;
+#endif
+	VM_BUG_ON(PageCompound(page) && page != compound_head(page));
+
+	return 1;
+}
+
 static inline int page_freeze_refs(struct page *page, int count)
 {
 	return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
-- 
cgit v1.2.3


From e2ce4eaa76214f65a3f328ec5b45c30248115768 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Date: Wed, 30 Apr 2008 15:10:07 +0100
Subject: regulator: consumer device interface

Add support to allow consumer device drivers to control their regulator
power supply.

This uses a similar API to the kernel clock interface in that consumer
drivers can get and put a regulator (like they can with clocks atm) and
get/set voltage, current limit, mode, enable and disable. This should
allow consumers complete control over their supply voltage and current
limit. This also compiles out if not in use so drivers can be reused in
systems with no regulator based power control.

Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/regulator/consumer.h | 284 +++++++++++++++++++++++++++++++++++++
 1 file changed, 284 insertions(+)
 create mode 100644 include/linux/regulator/consumer.h

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
new file mode 100644
index 000000000000..afdc4558bb94
--- /dev/null
+++ b/include/linux/regulator/consumer.h
@@ -0,0 +1,284 @@
+/*
+ * consumer.h -- SoC Regulator consumer support.
+ *
+ * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Regulator Consumer Interface.
+ *
+ * A Power Management Regulator framework for SoC based devices.
+ * Features:-
+ *   o Voltage and current level control.
+ *   o Operating mode control.
+ *   o Regulator status.
+ *   o sysfs entries for showing client devices and status
+ *
+ * EXPERIMENTAL FEATURES:
+ *   Dynamic Regulator operating Mode Switching (DRMS) - allows regulators
+ *   to use most efficient operating mode depending upon voltage and load and
+ *   is transparent to client drivers.
+ *
+ *   e.g. Devices x,y,z share regulator r. Device x and y draw 20mA each during
+ *   IO and 1mA at idle. Device z draws 100mA when under load and 5mA when
+ *   idling. Regulator r has > 90% efficiency in NORMAL mode at loads > 100mA
+ *   but this drops rapidly to 60% when below 100mA. Regulator r has > 90%
+ *   efficiency in IDLE mode at loads < 10mA. Thus regulator r will operate
+ *   in normal mode for loads > 10mA and in IDLE mode for load <= 10mA.
+ *
+ */
+
+#ifndef __LINUX_REGULATOR_CONSUMER_H_
+#define __LINUX_REGULATOR_CONSUMER_H_
+
+/*
+ * Regulator operating modes.
+ *
+ * Regulators can run in a variety of different operating modes depending on
+ * output load. This allows further system power savings by selecting the
+ * best (and most efficient) regulator mode for a desired load.
+ *
+ * Most drivers will only care about NORMAL. The modes below are generic and
+ * will probably not match the naming convention of your regulator data sheet
+ * but should match the use cases in the datasheet.
+ *
+ * In order of power efficiency (least efficient at top).
+ *
+ *  Mode       Description
+ *  FAST       Regulator can handle fast changes in it's load.
+ *             e.g. useful in CPU voltage & frequency scaling where
+ *             load can quickly increase with CPU frequency increases.
+ *
+ *  NORMAL     Normal regulator power supply mode. Most drivers will
+ *             use this mode.
+ *
+ *  IDLE       Regulator runs in a more efficient mode for light
+ *             loads. Can be used for devices that have a low power
+ *             requirement during periods of inactivity. This mode
+ *             may be more noisy than NORMAL and may not be able
+ *             to handle fast load switching.
+ *
+ *  STANDBY    Regulator runs in the most efficient mode for very
+ *             light loads. Can be used by devices when they are
+ *             in a sleep/standby state. This mode is likely to be
+ *             the most noisy and may not be able to handle fast load
+ *             switching.
+ *
+ * NOTE: Most regulators will only support a subset of these modes. Some
+ * will only just support NORMAL.
+ *
+ * These modes can be OR'ed together to make up a mask of valid register modes.
+ */
+
+#define REGULATOR_MODE_FAST			0x1
+#define REGULATOR_MODE_NORMAL			0x2
+#define REGULATOR_MODE_IDLE			0x4
+#define REGULATOR_MODE_STANDBY			0x8
+
+/*
+ * Regulator notifier events.
+ *
+ * UNDER_VOLTAGE  Regulator output is under voltage.
+ * OVER_CURRENT   Regulator output current is too high.
+ * REGULATION_OUT Regulator output is out of regulation.
+ * FAIL           Regulator output has failed.
+ * OVER_TEMP      Regulator over temp.
+ * FORCE_DISABLE  Regulator shut down by software.
+ *
+ * NOTE: These events can be OR'ed together when passed into handler.
+ */
+
+#define REGULATOR_EVENT_UNDER_VOLTAGE		0x01
+#define REGULATOR_EVENT_OVER_CURRENT		0x02
+#define REGULATOR_EVENT_REGULATION_OUT		0x04
+#define REGULATOR_EVENT_FAIL			0x08
+#define REGULATOR_EVENT_OVER_TEMP		0x10
+#define REGULATOR_EVENT_FORCE_DISABLE		0x20
+
+struct regulator;
+
+/**
+ * struct regulator_bulk_data - Data used for bulk regulator operations.
+ *
+ * @supply   The name of the supply.  Initialised by the user before
+ *           using the bulk regulator APIs.
+ * @consumer The regulator consumer for the supply.  This will be managed
+ *           by the bulk API.
+ *
+ * The regulator APIs provide a series of regulator_bulk_() API calls as
+ * a convenience to consumers which require multiple supplies.  This
+ * structure is used to manage data for these calls.
+ */
+struct regulator_bulk_data {
+	const char *supply;
+	struct regulator *consumer;
+};
+
+#if defined(CONFIG_REGULATOR)
+
+/* regulator get and put */
+struct regulator *__must_check regulator_get(struct device *dev,
+					     const char *id);
+void regulator_put(struct regulator *regulator);
+
+/* regulator output control and status */
+int regulator_enable(struct regulator *regulator);
+int regulator_disable(struct regulator *regulator);
+int regulator_force_disable(struct regulator *regulator);
+int regulator_is_enabled(struct regulator *regulator);
+
+int regulator_bulk_get(struct device *dev, int num_consumers,
+		       struct regulator_bulk_data *consumers);
+int regulator_bulk_enable(int num_consumers,
+			  struct regulator_bulk_data *consumers);
+int regulator_bulk_disable(int num_consumers,
+			   struct regulator_bulk_data *consumers);
+void regulator_bulk_free(int num_consumers,
+			 struct regulator_bulk_data *consumers);
+
+int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV);
+int regulator_get_voltage(struct regulator *regulator);
+int regulator_set_current_limit(struct regulator *regulator,
+			       int min_uA, int max_uA);
+int regulator_get_current_limit(struct regulator *regulator);
+
+int regulator_set_mode(struct regulator *regulator, unsigned int mode);
+unsigned int regulator_get_mode(struct regulator *regulator);
+int regulator_set_optimum_mode(struct regulator *regulator, int load_uA);
+
+/* regulator notifier block */
+int regulator_register_notifier(struct regulator *regulator,
+			      struct notifier_block *nb);
+int regulator_unregister_notifier(struct regulator *regulator,
+				struct notifier_block *nb);
+
+/* driver data - core doesn't touch */
+void *regulator_get_drvdata(struct regulator *regulator);
+void regulator_set_drvdata(struct regulator *regulator, void *data);
+
+#else
+
+/*
+ * Make sure client drivers will still build on systems with no software
+ * controllable voltage or current regulators.
+ */
+static inline struct regulator *__must_check regulator_get(struct device *dev,
+	const char *id)
+{
+	/* Nothing except the stubbed out regulator API should be
+	 * looking at the value except to check if it is an error
+	 * value so the actual return value doesn't matter.
+	 */
+	return (struct regulator *)id;
+}
+static inline void regulator_put(struct regulator *regulator)
+{
+}
+
+static inline int regulator_enable(struct regulator *regulator)
+{
+	return 0;
+}
+
+static inline int regulator_disable(struct regulator *regulator)
+{
+	return 0;
+}
+
+static inline int regulator_is_enabled(struct regulator *regulator)
+{
+	return 1;
+}
+
+static inline int regulator_bulk_get(struct device *dev,
+				     int num_consumers,
+				     struct regulator_bulk_data *consumers)
+{
+	return 0;
+}
+
+static inline int regulator_bulk_enable(int num_consumers,
+					struct regulator_bulk_data *consumers)
+{
+	return 0;
+}
+
+static inline int regulator_bulk_disable(int num_consumers,
+					 struct regulator_bulk_data *consumers)
+{
+	return 0;
+}
+
+static inline void regulator_bulk_free(int num_consumers,
+				       struct regulator_bulk_data *consumers)
+{
+}
+
+static inline int regulator_set_voltage(struct regulator *regulator,
+					int min_uV, int max_uV)
+{
+	return 0;
+}
+
+static inline int regulator_get_voltage(struct regulator *regulator)
+{
+	return 0;
+}
+
+static inline int regulator_set_current_limit(struct regulator *regulator,
+					     int min_uA, int max_uA)
+{
+	return 0;
+}
+
+static inline int regulator_get_current_limit(struct regulator *regulator)
+{
+	return 0;
+}
+
+static inline int regulator_set_mode(struct regulator *regulator,
+	unsigned int mode)
+{
+	return 0;
+}
+
+static inline unsigned int regulator_get_mode(struct regulator *regulator)
+{
+	return REGULATOR_MODE_NORMAL;
+}
+
+static inline int regulator_set_optimum_mode(struct regulator *regulator,
+					int load_uA)
+{
+	return REGULATOR_MODE_NORMAL;
+}
+
+static inline int regulator_register_notifier(struct regulator *regulator,
+			      struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int regulator_unregister_notifier(struct regulator *regulator,
+				struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline void *regulator_get_drvdata(struct regulator *regulator)
+{
+	return NULL;
+}
+
+static inline void regulator_set_drvdata(struct regulator *regulator,
+	void *data)
+{
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From 571a354b1542a274d88617e1f6703f3fe7a517f1 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Date: Wed, 30 Apr 2008 15:42:28 +0100
Subject: regulator: regulator driver interface

This allows regulator drivers to register their regulators and provide
operations to the core. It also has a notifier call chain for propagating
regulator events to clients.

Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/regulator/driver.h | 99 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 include/linux/regulator/driver.h

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
new file mode 100644
index 000000000000..1d712c7172a2
--- /dev/null
+++ b/include/linux/regulator/driver.h
@@ -0,0 +1,99 @@
+/*
+ * driver.h -- SoC Regulator driver support.
+ *
+ * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Regulator Driver Interface.
+ */
+
+#ifndef __LINUX_REGULATOR_DRIVER_H_
+#define __LINUX_REGULATOR_DRIVER_H_
+
+#include <linux/device.h>
+#include <linux/regulator/consumer.h>
+
+struct regulator_constraints;
+struct regulator_dev;
+
+/**
+ * struct regulator_ops - regulator operations.
+ *
+ * This struct describes regulator operations.
+ */
+struct regulator_ops {
+
+	/* get/set regulator voltage */
+	int (*set_voltage) (struct regulator_dev *, int min_uV, int max_uV);
+	int (*get_voltage) (struct regulator_dev *);
+
+	/* get/set regulator current  */
+	int (*set_current_limit) (struct regulator_dev *,
+				 int min_uA, int max_uA);
+	int (*get_current_limit) (struct regulator_dev *);
+
+	/* enable/disable regulator */
+	int (*enable) (struct regulator_dev *);
+	int (*disable) (struct regulator_dev *);
+	int (*is_enabled) (struct regulator_dev *);
+
+	/* get/set regulator operating mode (defined in regulator.h) */
+	int (*set_mode) (struct regulator_dev *, unsigned int mode);
+	unsigned int (*get_mode) (struct regulator_dev *);
+
+	/* get most efficient regulator operating mode for load */
+	unsigned int (*get_optimum_mode) (struct regulator_dev *, int input_uV,
+					  int output_uV, int load_uA);
+
+	/* the operations below are for configuration of regulator state when
+	 * it's parent PMIC enters a global STANBY/HIBERNATE state */
+
+	/* set regulator suspend voltage */
+	int (*set_suspend_voltage) (struct regulator_dev *, int uV);
+
+	/* enable/disable regulator in suspend state */
+	int (*set_suspend_enable) (struct regulator_dev *);
+	int (*set_suspend_disable) (struct regulator_dev *);
+
+	/* set regulator suspend operating mode (defined in regulator.h) */
+	int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode);
+};
+
+/*
+ * Regulators can either control voltage or current.
+ */
+enum regulator_type {
+	REGULATOR_VOLTAGE,
+	REGULATOR_CURRENT,
+};
+
+/**
+ * struct regulator_desc - Regulator descriptor
+ *
+ */
+struct regulator_desc {
+	const char *name;
+	int id;
+	struct regulator_ops *ops;
+	int irq;
+	enum regulator_type type;
+	struct module *owner;
+};
+
+
+struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
+					  void *reg_data);
+void regulator_unregister(struct regulator_dev *rdev);
+
+int regulator_notifier_call_chain(struct regulator_dev *rdev,
+				  unsigned long event, void *data);
+
+void *rdev_get_drvdata(struct regulator_dev *rdev);
+int rdev_get_id(struct regulator_dev *rdev);
+
+#endif
-- 
cgit v1.2.3


From 4c1184e85cb381121a5273ea20ad31ca3faa0a4f Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Date: Wed, 30 Apr 2008 15:46:09 +0100
Subject: regulator: machine driver interface

This interface is for machine specific code and allows the creation of
voltage/current domains (with constraints) for each regulator. It can
provide regulator constraints that will prevent device damage through
overvoltage or over current caused by buggy client drivers. It also
allows the creation of a regulator tree whereby some regulators are
supplied by others (similar to a clock tree).

Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/regulator/machine.h | 104 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 include/linux/regulator/machine.h

(limited to 'include/linux')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
new file mode 100644
index 000000000000..11e737dbfcf2
--- /dev/null
+++ b/include/linux/regulator/machine.h
@@ -0,0 +1,104 @@
+/*
+ * machine.h -- SoC Regulator support, machine/board driver API.
+ *
+ * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Regulator Machine/Board Interface.
+ */
+
+#ifndef __LINUX_REGULATOR_MACHINE_H_
+#define __LINUX_REGULATOR_MACHINE_H_
+
+#include <linux/regulator/consumer.h>
+#include <linux/suspend.h>
+
+struct regulator;
+
+/*
+ * Regulator operation constraint flags. These flags are used to enable
+ * certain regulator operations and can be OR'ed together.
+ *
+ * VOLTAGE:  Regulator output voltage can be changed by software on this
+ *           board/machine.
+ * CURRENT:  Regulator output current can be changed by software on this
+ *           board/machine.
+ * MODE:     Regulator operating mode can be changed by software on this
+ *           board/machine.
+ * STATUS:   Regulator can be enabled and disabled.
+ * DRMS:     Dynamic Regulator Mode Switching is enabled for this regulator.
+ */
+
+#define REGULATOR_CHANGE_VOLTAGE	0x1
+#define REGULATOR_CHANGE_CURRENT	0x2
+#define REGULATOR_CHANGE_MODE		0x4
+#define REGULATOR_CHANGE_STATUS		0x8
+#define REGULATOR_CHANGE_DRMS		0x10
+
+/**
+ * struct regulator_state - regulator state during low power syatem states
+ *
+ * This describes a regulators state during a system wide low power state.
+ */
+struct regulator_state {
+	int uV;	/* suspend voltage */
+	unsigned int mode; /* suspend regulator operating mode */
+	int enabled; /* is regulator enabled in this suspend state */
+};
+
+/**
+ * struct regulation_constraints - regulator operating constraints.
+ *
+ * This struct describes regulator and board/machine specific constraints.
+ */
+struct regulation_constraints {
+
+	char *name;
+
+	/* voltage output range (inclusive) - for voltage control */
+	int min_uV;
+	int max_uV;
+
+	/* current output range (inclusive) - for current control */
+	int min_uA;
+	int max_uA;
+
+	/* valid regulator operating modes for this machine */
+	unsigned int valid_modes_mask;
+
+	/* valid operations for regulator on this machine */
+	unsigned int valid_ops_mask;
+
+	/* regulator input voltage - only if supply is another regulator */
+	int input_uV;
+
+	/* regulator suspend states for global PMIC STANDBY/HIBERNATE */
+	struct regulator_state state_disk;
+	struct regulator_state state_mem;
+	struct regulator_state state_standby;
+	suspend_state_t initial_state; /* suspend state to set at init */
+
+	/* constriant flags */
+	unsigned always_on:1;	/* regulator never off when system is on */
+	unsigned boot_on:1;	/* bootloader/firmware enabled regulator */
+	unsigned apply_uV:1;	/* apply uV constraint iff min == max */
+};
+
+int regulator_set_supply(const char *regulator, const char *regulator_supply);
+
+const char *regulator_get_supply(const char *regulator);
+
+int regulator_set_machine_constraints(const char *regulator,
+	struct regulation_constraints *constraints);
+
+int regulator_set_device_supply(const char *regulator, struct device *dev,
+				const char *supply);
+
+int regulator_suspend_prepare(suspend_state_t state);
+
+#endif
-- 
cgit v1.2.3


From 48d335ba3164ce99cb8847513d0e3b6ee604eb20 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 30 Apr 2008 15:50:21 +0100
Subject: regulator: fixed regulator interface

This patch adds support for fixed regulators. This class of regulator is
not software controllable but can coexist on machines with software
controlable regulators.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
---
 include/linux/regulator/fixed.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 include/linux/regulator/fixed.h

(limited to 'include/linux')

diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
new file mode 100644
index 000000000000..1387a5d2190e
--- /dev/null
+++ b/include/linux/regulator/fixed.h
@@ -0,0 +1,22 @@
+/*
+ * fixed.h
+ *
+ * Copyright 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ */
+
+#ifndef __REGULATOR_FIXED_H
+#define __REGULATOR_FIXED_H
+
+struct fixed_voltage_config {
+	const char *supply_name;
+	int microvolts;
+};
+
+#endif
-- 
cgit v1.2.3


From 0eb5d5ab3ec99bfd22ff16797d95835369ffb25b Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Fri, 11 Jul 2008 17:28:06 +0200
Subject: regulator: TI bq24022 Li-Ion Charger driver

This adds a regulator driver for the TI bq24022 Single-Chip
Li-Ion Charger with its nCE and ISET2 pins connected to GPIOs.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
---
 drivers/regulator/Kconfig         |  10 +++
 drivers/regulator/Makefile        |   2 +
 drivers/regulator/bq24022.c       | 167 ++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/bq24022.h |  21 +++++
 4 files changed, 200 insertions(+)
 create mode 100644 drivers/regulator/bq24022.c
 create mode 100644 include/linux/regulator/bq24022.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 84f89ecce69e..a656128f1fdd 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -46,4 +46,14 @@ config REGULATOR_VIRTUAL_CONSUMER
 
           If unsure, say no.
 
+config REGULATOR_BQ24022
+	tristate "TI bq24022 Dual Input 1-Cell Li-Ion Charger IC"
+	default n
+	select REGULATOR
+	help
+	  This driver controls a TI bq24022 Charger attached via
+	  GPIOs. The provided current regulator can enable/disable
+	  charging select between 100 mA and 500 mA charging current
+	  limit.
+
 endmenu
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 29528b78c8de..ac2c64efe65c 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -7,4 +7,6 @@ obj-$(CONFIG_REGULATOR) += core.o
 obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
 obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
 
+obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
+
 ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
diff --git a/drivers/regulator/bq24022.c b/drivers/regulator/bq24022.c
new file mode 100644
index 000000000000..263699d6152d
--- /dev/null
+++ b/drivers/regulator/bq24022.c
@@ -0,0 +1,167 @@
+/*
+ * Support for TI bq24022 (bqTINY-II) Dual Input (USB/AC Adpater)
+ * 1-Cell Li-Ion Charger connected via GPIOs.
+ *
+ * Copyright (c) 2008 Philipp Zabel
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/regulator/bq24022.h>
+#include <linux/regulator/driver.h>
+
+static int bq24022_set_current_limit(struct regulator_dev *rdev,
+					int min_uA, int max_uA)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	dev_dbg(&pdev->dev, "setting current limit to %s mA\n",
+		max_uA >= 500000 ? "500" : "100");
+
+	/* REVISIT: maybe return error if min_uA != 0 ? */
+	gpio_set_value(pdata->gpio_iset2, max_uA >= 500000);
+	return 0;
+}
+
+static int bq24022_get_current_limit(struct regulator_dev *rdev)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	return gpio_get_value(pdata->gpio_iset2) ? 500000 : 100000;
+}
+
+static int bq24022_enable(struct regulator_dev *rdev)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	dev_dbg(&pdev->dev, "enabling charger\n");
+
+	gpio_set_value(pdata->gpio_nce, 0);
+	return 0;
+}
+
+static int bq24022_disable(struct regulator_dev *rdev)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	dev_dbg(&pdev->dev, "disabling charger\n");
+
+	gpio_set_value(pdata->gpio_nce, 1);
+	return 0;
+}
+
+static int bq24022_is_enabled(struct regulator_dev *rdev)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	return !gpio_get_value(pdata->gpio_nce);
+}
+
+static struct regulator_ops bq24022_ops = {
+	.set_current_limit = bq24022_set_current_limit,
+	.get_current_limit = bq24022_get_current_limit,
+	.enable            = bq24022_enable,
+	.disable           = bq24022_disable,
+	.is_enabled        = bq24022_is_enabled,
+};
+
+static struct regulator_desc bq24022_desc = {
+	.name  = "bq24022",
+	.ops   = &bq24022_ops,
+	.type  = REGULATOR_CURRENT,
+};
+
+static int __init bq24022_probe(struct platform_device *pdev)
+{
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+	struct regulator_dev *bq24022;
+	int ret;
+
+	if (!pdata || !pdata->gpio_nce || !pdata->gpio_iset2)
+		return -EINVAL;
+
+	ret = gpio_request(pdata->gpio_nce, "ncharge_en");
+	if (ret) {
+		dev_dbg(&pdev->dev, "couldn't request nCE GPIO: %d\n",
+			pdata->gpio_nce);
+		goto err_ce;
+	}
+	ret = gpio_request(pdata->gpio_iset2, "charge_mode");
+	if (ret) {
+		dev_dbg(&pdev->dev, "couldn't request ISET2 GPIO: %d\n",
+			pdata->gpio_iset2);
+		goto err_iset2;
+	}
+	ret = gpio_direction_output(pdata->gpio_iset2, 0);
+	ret = gpio_direction_output(pdata->gpio_nce, 1);
+
+	bq24022 = regulator_register(&bq24022_desc, pdev);
+	if (IS_ERR(bq24022)) {
+		dev_dbg(&pdev->dev, "couldn't register regulator\n");
+		ret = PTR_ERR(bq24022);
+		goto err_reg;
+	}
+	platform_set_drvdata(pdev, bq24022);
+	dev_dbg(&pdev->dev, "registered regulator\n");
+
+	return 0;
+err_reg:
+	gpio_free(pdata->gpio_iset2);
+err_iset2:
+	gpio_free(pdata->gpio_nce);
+err_ce:
+	return ret;
+}
+
+static int __devexit bq24022_remove(struct platform_device *pdev)
+{
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+	struct regulator_dev *bq24022 = platform_get_drvdata(pdev);
+
+	regulator_unregister(bq24022);
+	gpio_free(pdata->gpio_iset2);
+	gpio_free(pdata->gpio_nce);
+
+	return 0;
+}
+
+static struct platform_driver bq24022_driver = {
+	.driver = {
+		.name = "bq24022",
+	},
+	.remove = __devexit_p(bq24022_remove),
+};
+
+static int __init bq24022_init(void)
+{
+	return platform_driver_probe(&bq24022_driver, bq24022_probe);
+}
+
+static void __exit bq24022_exit(void)
+{
+	platform_driver_unregister(&bq24022_driver);
+}
+
+/*
+ * make sure this is probed before gpio_vbus and pda_power,
+ * but after asic3 or other GPIO expander drivers.
+ */
+subsys_initcall(bq24022_init);
+module_exit(bq24022_exit);
+
+MODULE_AUTHOR("Philipp Zabel");
+MODULE_DESCRIPTION("TI bq24022 Li-Ion Charger driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/regulator/bq24022.h b/include/linux/regulator/bq24022.h
new file mode 100644
index 000000000000..e84b0a9feda5
--- /dev/null
+++ b/include/linux/regulator/bq24022.h
@@ -0,0 +1,21 @@
+/*
+ * Support for TI bq24022 (bqTINY-II) Dual Input (USB/AC Adpater)
+ * 1-Cell Li-Ion Charger connected via GPIOs.
+ *
+ * Copyright (c) 2008 Philipp Zabel
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/**
+ * bq24022_mach_info - platform data for bq24022
+ * @gpio_nce: GPIO line connected to the nCE pin, used to enable / disable charging
+ * @gpio_iset2: GPIO line connected to the ISET2 pin, used to limit charging current to 100 mA / 500 mA
+ */
+struct bq24022_mach_info {
+	int gpio_nce;
+	int gpio_iset2;
+};
-- 
cgit v1.2.3


From 785957d3e8c6fb37b18bf671923a76dbd8240025 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 30 Jul 2008 03:03:15 -0700
Subject: tcp: MD5: Use MIB counter instead of warning for MD5 mismatch.

From a report by Matti Aarnio, and preliminary patch by Adam Langley.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h |  2 ++
 net/ipv4/proc.c      |  2 ++
 net/ipv4/tcp_ipv4.c  | 10 ++--------
 net/ipv6/tcp_ipv6.c  | 27 ++++++++-------------------
 4 files changed, 14 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 5df62ef1280c..7a6e6bba4a71 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -214,6 +214,8 @@ enum
 	LINUX_MIB_TCPDSACKIGNOREDOLD,		/* TCPSACKIgnoredOld */
 	LINUX_MIB_TCPDSACKIGNOREDNOUNDO,	/* TCPSACKIgnoredNoUndo */
 	LINUX_MIB_TCPSPURIOUSRTOS,		/* TCPSpuriousRTOs */
+	LINUX_MIB_TCPMD5NOTFOUND,		/* TCPMD5NotFound */
+	LINUX_MIB_TCPMD5UNEXPECTED,		/* TCPMD5Unexpected */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 834356ea99df..8f5a403f6f6b 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -232,6 +232,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD),
 	SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO),
 	SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
+	SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND),
+	SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a2b06d0cc26b..b3875c0d83c7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1116,18 +1116,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 		return 0;
 
 	if (hash_expected && !hash_location) {
-		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
-			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-			       NIPQUAD(iph->saddr), ntohs(th->source),
-			       NIPQUAD(iph->daddr), ntohs(th->dest));
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 		return 1;
 	}
 
 	if (!hash_expected && hash_location) {
-		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
-			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-			       NIPQUAD(iph->saddr), ntohs(th->source),
-			       NIPQUAD(iph->daddr), ntohs(th->dest));
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 		return 1;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cff778b23a7f..1db45216b232 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -849,28 +849,17 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
 	hash_location = tcp_parse_md5sig_option(th);
 
-	/* do we have a hash as expected? */
-	if (!hash_expected) {
-		if (!hash_location)
-			return 0;
-		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash NOT expected but found "
-			       "(" NIP6_FMT ", %u)->"
-			       "(" NIP6_FMT ", %u)\n",
-			       NIP6(ip6h->saddr), ntohs(th->source),
-			       NIP6(ip6h->daddr), ntohs(th->dest));
-		}
+	/* We've parsed the options - do we have a hash? */
+	if (!hash_expected && !hash_location)
+		return 0;
+
+	if (hash_expected && !hash_location) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 		return 1;
 	}
 
-	if (!hash_location) {
-		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash expected but NOT found "
-			       "(" NIP6_FMT ", %u)->"
-			       "(" NIP6_FMT ", %u)\n",
-			       NIP6(ip6h->saddr), ntohs(th->source),
-			       NIP6(ip6h->daddr), ntohs(th->dest));
-		}
+	if (!hash_expected && hash_location) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 		return 1;
 	}
 
-- 
cgit v1.2.3


From 96d8b647cfff90c8ff07863866aacdcd9d13cead Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@infradead.org>
Date: Tue, 29 Jul 2008 13:54:11 +0100
Subject: [MTD] [NAND] fix subpage read for small page NAND

Current implementation of subpage read feature for NAND has issues with
small page devices. Small page NAND do not support RNDOUT command.
So subpage feature is not applicable for them.

This patch disables support of subpage for small page NAND.
The code is verified on nandsim(SP NAND simulation) and on LP NAND
devices.

Thanks a lot to Artem for finding this issue.

Signed-off-by: Alexey Korolev <akorolev@infradead.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 83f678702dff..81774e5facf4 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -177,7 +177,9 @@ typedef enum {
 #define NAND_MUST_PAD(chip) (!(chip->options & NAND_NO_PADDING))
 #define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG))
 #define NAND_HAS_COPYBACK(chip) ((chip->options & NAND_COPYBACK))
-#define NAND_SUBPAGE_READ(chip) ((chip->ecc.mode == NAND_ECC_SOFT))
+/* Large page NAND with SOFT_ECC should support subpage reads */
+#define NAND_SUBPAGE_READ(chip) ((chip->ecc.mode == NAND_ECC_SOFT) \
+					&& (chip->page_shift > 9))
 
 /* Mask to zero out the chip options, which come from the id table */
 #define NAND_CHIPOPTIONS_MSK	(0x0000ffff & ~NAND_NO_AUTOINCR)
-- 
cgit v1.2.3


From 95b1bc20532c18e3f19cd460c8350350c84ffbb2 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 29 Jul 2008 22:28:12 -0700
Subject: [MTD] MTD_DEBUG always does compile-time typechecks

The current style for debug messages is to ensure they're always
parsed by the compiler and then subjected to dead code removal.
That way builds won't break only when debug options get enabled,
which is common when they are stripped out early by CPP.

This patch makes CONFIG_MTD_DEBUG adopt that convention.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 4ed40caff4e5..922636548558 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -272,7 +272,11 @@ static inline void mtd_erase_callback(struct erase_info *instr)
 			printk(KERN_INFO args);		\
 	} while(0)
 #else /* CONFIG_MTD_DEBUG */
-#define DEBUG(n, args...) do { } while(0)
+#define DEBUG(n, args...)				\
+	do {						\
+		if (0)					\
+			printk(KERN_INFO args);		\
+	} while(0)
 
 #endif /* CONFIG_MTD_DEBUG */
 
-- 
cgit v1.2.3


From 1a4e564b7db999fbe5d88318c96ac8747699d417 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 29 Jul 2008 22:32:57 -0700
Subject: resource: add resource_size()

Avoid one-off errors by introducing a resource_size() function.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ioport.h | 4 ++++
 kernel/resource.c      | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 2cd07cc29687..22d2115458c6 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -118,6 +118,10 @@ extern int allocate_resource(struct resource *root, struct resource *new,
 int adjust_resource(struct resource *res, resource_size_t start,
 		    resource_size_t size);
 resource_size_t resource_alignment(struct resource *res);
+static inline resource_size_t resource_size(struct resource *res)
+{
+	return res->end - res->start + 1;
+}
 
 /* Convenience shorthand with allocation */
 #define request_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name))
diff --git a/kernel/resource.c b/kernel/resource.c
index 74af2d7cb5a1..f5b518eabefe 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -490,7 +490,7 @@ resource_size_t resource_alignment(struct resource *res)
 {
 	switch (res->flags & (IORESOURCE_SIZEALIGN | IORESOURCE_STARTALIGN)) {
 	case IORESOURCE_SIZEALIGN:
-		return res->end - res->start + 1;
+		return resource_size(res);
 	case IORESOURCE_STARTALIGN:
 		return res->start;
 	default:
-- 
cgit v1.2.3


From a1531acd43310a7e4571d52e8846640667f4c74b Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Tue, 29 Jul 2008 22:32:58 -0700
Subject: cpufreq acpi: only call _PPC after cpufreq ACPI init funcs got called
 already

Ingo Molnar provided a fix to not call _PPC at processor driver
initialization time in "[PATCH] ACPI: fix cpufreq regression" (git
commit e4233dec749a3519069d9390561b5636a75c7579)

But it can still happen that _PPC is called at processor driver
initialization time.

This patch should make sure that this is not possible anymore.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c |  6 ++++++
 drivers/acpi/processor_perflib.c              | 15 +++++++++++++--
 drivers/cpufreq/cpufreq.c                     |  3 +++
 include/linux/cpufreq.h                       |  1 +
 4 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
index 69288f653144..3233fe84d158 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
@@ -96,6 +96,12 @@ static int pmi_notifier(struct notifier_block *nb,
 	struct cpufreq_frequency_table *cbe_freqs;
 	u8 node;
 
+	/* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE
+	 * and CPUFREQ_NOTIFY policy events?)
+	 */
+	if (event == CPUFREQ_START)
+		return 0;
+
 	cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
 	node = cbe_cpu_to_node(policy->cpu);
 
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index b4749969c6b4..e98071a64810 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -64,7 +64,13 @@ static DEFINE_MUTEX(performance_mutex);
  * policy is adjusted accordingly.
  */
 
-static unsigned int ignore_ppc = 0;
+/* ignore_ppc:
+ * -1 -> cpufreq low level drivers not initialized -> _PSS, etc. not called yet
+ *       ignore _PPC
+ *  0 -> cpufreq low level drivers initialized -> consider _PPC values
+ *  1 -> ignore _PPC totally -> forced by user through boot param
+ */
+static unsigned int ignore_ppc = -1;
 module_param(ignore_ppc, uint, 0644);
 MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
 		 "limited by BIOS, this should help");
@@ -72,7 +78,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
 #define PPC_REGISTERED   1
 #define PPC_IN_USE       2
 
-static int acpi_processor_ppc_status = 0;
+static int acpi_processor_ppc_status;
 
 static int acpi_processor_ppc_notifier(struct notifier_block *nb,
 				       unsigned long event, void *data)
@@ -81,6 +87,11 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
 	struct acpi_processor *pr;
 	unsigned int ppc = 0;
 
+	if (event == CPUFREQ_START && ignore_ppc <= 0) {
+		ignore_ppc = 0;
+		return 0;
+	}
+
 	if (ignore_ppc)
 		return 0;
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8d6a3ff02672..8a67f16987db 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -825,6 +825,9 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	policy->user_policy.min = policy->cpuinfo.min_freq;
 	policy->user_policy.max = policy->cpuinfo.max_freq;
 
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+				     CPUFREQ_START, policy);
+
 #ifdef CONFIG_SMP
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2270ca5ec631..6fd5668aa572 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -106,6 +106,7 @@ struct cpufreq_policy {
 #define CPUFREQ_ADJUST		(0)
 #define CPUFREQ_INCOMPATIBLE	(1)
 #define CPUFREQ_NOTIFY		(2)
+#define CPUFREQ_START		(3)
 
 #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
 #define CPUFREQ_SHARED_TYPE_HW	 (1) /* HW does needed coordination */
-- 
cgit v1.2.3


From 1d1958f05095a7e9ecbba86235122784a3d1b561 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 29 Jul 2008 22:33:16 -0700
Subject: mm: remove find_max_pfn_with_active_regions

It has no user now

Also print out info about adding/removing active regions.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 -
 mm/page_alloc.c    | 17 -----------------
 2 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 866a3dbe5c75..5e2c8af49998 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1041,7 +1041,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 extern void get_pfn_range_for_nid(unsigned int nid,
 			unsigned long *start_pfn, unsigned long *end_pfn);
 extern unsigned long find_min_pfn_with_active_regions(void);
-extern unsigned long find_max_pfn_with_active_regions(void);
 extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3cf3d05b6bd4..401d104d2bb6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3753,23 +3753,6 @@ unsigned long __init find_min_pfn_with_active_regions(void)
 	return find_min_pfn_for_node(MAX_NUMNODES);
 }
 
-/**
- * find_max_pfn_with_active_regions - Find the maximum PFN registered
- *
- * It returns the maximum PFN based on information provided via
- * add_active_range().
- */
-unsigned long __init find_max_pfn_with_active_regions(void)
-{
-	int i;
-	unsigned long max_pfn = 0;
-
-	for (i = 0; i < nr_nodemap_entries; i++)
-		max_pfn = max(max_pfn, early_node_map[i].end_pfn);
-
-	return max_pfn;
-}
-
 /*
  * early_calculate_totalpages()
  * Sum pages in active regions for movable zone.
-- 
cgit v1.2.3


From 3f1712bac586069d6c891a8201457283b27e8abe Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Tue, 29 Jul 2008 22:33:32 -0700
Subject: print_ip_sym(): use %pS

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kallsyms.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 57aefa160a92..b96144887444 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -108,8 +108,7 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr)
 
 static inline void print_ip_sym(unsigned long ip)
 {
-	printk("[<%p>]", (void *) ip);
-	print_symbol(" %s\n", ip);
+	printk("[<%p>] %pS\n", (void *) ip, (void *) ip);
 }
 
 #endif /*_LINUX_KALLSYMS_H*/
-- 
cgit v1.2.3


From 2c203003f64de5fe55ae35712942100d270667fa Mon Sep 17 00:00:00 2001
From: Jerome Arbez-Gindre <jeromearbezgindre@gmail.com>
Date: Tue, 29 Jul 2008 22:33:33 -0700
Subject: connector: add a BlackBoard user to connector

Add a BlackBoard user to connector.  BlackBoard is part of the TSP GPL
sampling framework (http://savannah.nongnu.org/p/tsp)

[akpm@linux-foundation.org: add comment]
Signed-off-by: Jerome Arbez-Gindre <jeromearbezgindre@gmail.com>
Acked-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/connector.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/connector.h b/include/linux/connector.h
index 96a89d3d6727..5c7f9468f753 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -38,8 +38,9 @@
 #define CN_W1_VAL			0x1
 #define CN_IDX_V86D			0x4
 #define CN_VAL_V86D_UVESAFB		0x1
+#define CN_IDX_BB			0x5	/* BlackBoard, from the TSP GPL sampling framework */
 
-#define CN_NETLINK_USERS		5
+#define CN_NETLINK_USERS		6
 
 /*
  * Maximum connector's message size.
-- 
cgit v1.2.3


From 204b885e7322656284626949e51f292fe61313fa Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 29 Jul 2008 22:33:42 -0700
Subject: introduce lower_32_bits() macro

The file kernel.h contains the upper_32_bits macro.  This patch adds the
other part, the lower_32_bits macro.  Its first use will be in the driver
for AMD IOMMU.

Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fdbbf72ca2eb..aaa998f65c7a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -75,6 +75,12 @@ extern const char linux_proc_banner[];
  */
 #define upper_32_bits(n) ((u32)(((n) >> 16) >> 16))
 
+/**
+ * lower_32_bits - return bits 0-31 of a number
+ * @n: the number we're accessing
+ */
+#define lower_32_bits(n) ((u32)(n))
+
 #define	KERN_EMERG	"<0>"	/* system is unusable			*/
 #define	KERN_ALERT	"<1>"	/* action must be taken immediately	*/
 #define	KERN_CRIT	"<2>"	/* critical conditions			*/
-- 
cgit v1.2.3


From c627f9cc046c7cd93b4525d89377fb409e170a18 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:53 -0700
Subject: mm: add zap_vma_ptes(): a library function to unmap driver ptes

zap_vma_ptes() is intended to be used by drivers to unmap ptes assigned to the
driver private vmas.  This interface is similar to zap_page_range() but is
less general & less likely to be abused.

Needed by the GRU driver.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5e2c8af49998..335288bff1b7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -744,6 +744,8 @@ struct zap_details {
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 		pte_t pte);
 
+int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
+		unsigned long size);
 unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size, struct zap_details *);
 unsigned long unmap_vmas(struct mmu_gather **tlb,
diff --git a/mm/memory.c b/mm/memory.c
index 67f0ab9077d9..6793b9c68107 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -994,6 +994,29 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 	return end;
 }
 
+/**
+ * zap_vma_ptes - remove ptes mapping the vma
+ * @vma: vm_area_struct holding ptes to be zapped
+ * @address: starting address of pages to zap
+ * @size: number of bytes to zap
+ *
+ * This function only unmaps ptes assigned to VM_PFNMAP vmas.
+ *
+ * The entire address range must be fully contained within the vma.
+ *
+ * Returns 0 if successful.
+ */
+int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
+		unsigned long size)
+{
+	if (address < vma->vm_start || address + size > vma->vm_end ||
+	    		!(vma->vm_flags & VM_PFNMAP))
+		return -1;
+	zap_page_range(vma, address, size, NULL);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(zap_vma_ptes);
+
 /*
  * Do a quick page-table lookup for a single page.
  */
-- 
cgit v1.2.3


From 3dd730f2b49f101b90d283c3efc4e6cd826dd8f6 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 29 Jul 2008 16:07:37 +1000
Subject: cpumask: statement expressions confuse some versions of gcc

when you take the address of the result.  Noticed on a sparc64 compile
using a version 3.4.5 cross compiler.

 kernel/time/tick-common.c: In function `tick_check_new_device':
 kernel/time/tick-common.c:210: error: invalid lvalue in unary `&'
 ...

Just make it a regular expression.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 96d0509fb8d8..d3219d73f8e6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -287,7 +287,7 @@ static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
  * gcc optimizes it out (it's a constant) and there's no huge stack
  * variable created:
  */
-#define cpumask_of_cpu(cpu) ({ *get_cpu_mask(cpu); })
+#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
 
 
 #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
-- 
cgit v1.2.3


From 1f938d060a7bc01b5f82d46db3e38cd501b445a6 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Mon, 21 Jul 2008 00:06:19 +0400
Subject: libata.h: replace __FUNCTION__ with __func__

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5b247b8a6b3b..d4b8e5fa3e8b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -60,9 +60,9 @@
 
 /* note: prints function name for you */
 #ifdef ATA_DEBUG
-#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
+#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
 #ifdef ATA_VERBOSE_DEBUG
-#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
+#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
 #else
 #define VPRINTK(fmt, args...)
 #endif	/* ATA_VERBOSE_DEBUG */
@@ -71,7 +71,7 @@
 #define VPRINTK(fmt, args...)
 #endif	/* ATA_DEBUG */
 
-#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
+#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __func__, ## args)
 
 /* NEW: debug levels */
 #define HAVE_LIBATA_MSG 1
-- 
cgit v1.2.3


From 963e4975c6f93c148ca809d986d412201df9af89 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 24 Jul 2008 17:16:06 +0100
Subject: pata_it821x: Driver updates and reworking

- Add support for the RDC 1010 variant
- Rework the core library to have a read_id method. This allows the hacky
  bits of it821x to go and prepares us for pata_hd
- Switch from WARN to BUG in ata_id_string as it will reboot if you get
  it wrong so WARN won't be seen
- Allow the issue of command 0xFC on the 821x. This is needed to query
  rebuild status.
- Tidy up printk formatting
- Do more ident rewriting on RAID volumes to handle firmware provided
  ident data which is rather wonky
- Report the firmware revision and device layout in RAID mode
- Don't try and disable raid on the 8211 or RDC - they don't have the
  relevant bits

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c |  31 +++++-
 drivers/ata/pata_it821x.c | 270 ++++++++++++++++++++++++++++++++++++++++------
 include/linux/libata.h    |   3 +
 3 files changed, 265 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index f69d1548b562..5ba96c5052c8 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1132,6 +1132,8 @@ void ata_id_string(const u16 *id, unsigned char *s,
 {
 	unsigned int c;
 
+	BUG_ON(len & 1);
+
 	while (len > 0) {
 		c = id[ofs] >> 8;
 		*s = c;
@@ -1165,8 +1167,6 @@ void ata_id_c_string(const u16 *id, unsigned char *s,
 {
 	unsigned char *p;
 
-	WARN_ON(!(len & 1));
-
 	ata_id_string(id, s, ofs, len - 1);
 
 	p = s + strnlen(s, len - 1);
@@ -1885,6 +1885,23 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
 	return 3 << ATA_SHIFT_PIO;
 }
 
+/**
+ *	ata_do_dev_read_id		-	default ID read method
+ *	@dev: device
+ *	@tf: proposed taskfile
+ *	@id: data buffer
+ *
+ *	Issue the identify taskfile and hand back the buffer containing
+ *	identify data. For some RAID controllers and for pre ATA devices
+ *	this function is wrapped or replaced by the driver
+ */
+unsigned int ata_do_dev_read_id(struct ata_device *dev,
+					struct ata_taskfile *tf, u16 *id)
+{
+	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
+				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
+}
+
 /**
  *	ata_dev_read_id - Read ID data from the specified device
  *	@dev: target device
@@ -1920,7 +1937,7 @@ int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class,
 	if (ata_msg_ctl(ap))
 		ata_dev_printk(dev, KERN_DEBUG, "%s: ENTER\n", __func__);
 
- retry:
+retry:
 	ata_tf_init(dev, &tf);
 
 	switch (class) {
@@ -1948,8 +1965,11 @@ int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class,
 	 */
 	tf.flags |= ATA_TFLAG_POLLING;
 
-	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
-				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
+	if (ap->ops->read_id)
+		err_mask = ap->ops->read_id(dev, &tf, id);
+	else
+		err_mask = ata_do_dev_read_id(dev, &tf, id);
+
 	if (err_mask) {
 		if (err_mask & AC_ERR_NODEV_HINT) {
 			ata_dev_printk(dev, KERN_DEBUG,
@@ -6283,6 +6303,7 @@ EXPORT_SYMBOL_GPL(ata_host_resume);
 #endif /* CONFIG_PM */
 EXPORT_SYMBOL_GPL(ata_id_string);
 EXPORT_SYMBOL_GPL(ata_id_c_string);
+EXPORT_SYMBOL_GPL(ata_do_dev_read_id);
 EXPORT_SYMBOL_GPL(ata_scsi_simulate);
 
 EXPORT_SYMBOL_GPL(ata_pio_need_iordy);
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index e10816931b2f..27843c70eb9d 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -80,7 +80,7 @@
 
 
 #define DRV_NAME "pata_it821x"
-#define DRV_VERSION "0.3.8"
+#define DRV_VERSION "0.4.0"
 
 struct it821x_dev
 {
@@ -425,6 +425,8 @@ static unsigned int it821x_smart_qc_issue(struct ata_queued_cmd *qc)
 		case ATA_CMD_WRITE_MULTI:
 		case ATA_CMD_WRITE_MULTI_EXT:
 		case ATA_CMD_ID_ATA:
+		case ATA_CMD_INIT_DEV_PARAMS:
+		case 0xFC:	/* Internal 'report rebuild state' */
 		/* Arguably should just no-op this one */
 		case ATA_CMD_SET_FEATURES:
 			return ata_sff_qc_issue(qc);
@@ -509,7 +511,7 @@ static void it821x_dev_config(struct ata_device *adev)
 
 	if (strstr(model_num, "Integrated Technology Express")) {
 		/* RAID mode */
-		printk(KERN_INFO "IT821x %sRAID%d volume",
+		ata_dev_printk(adev, KERN_INFO, "%sRAID%d volume",
 			adev->id[147]?"Bootable ":"",
 			adev->id[129]);
 		if (adev->id[129] != 1)
@@ -519,37 +521,51 @@ static void it821x_dev_config(struct ata_device *adev)
 	/* This is a controller firmware triggered funny, don't
 	   report the drive faulty! */
 	adev->horkage &= ~ATA_HORKAGE_DIAGNOSTIC;
+	/* No HPA in 'smart' mode */
+	adev->horkage |= ATA_HORKAGE_BROKEN_HPA;
 }
 
 /**
- *	it821x_ident_hack	-	Hack identify data up
- *	@ap: Port
+ *	it821x_read_id	-	Hack identify data up
+ *	@adev: device to read
+ *	@tf: proposed taskfile
+ *	@id: buffer for returned ident data
  *
- *	Walk the devices on this firmware driven port and slightly
+ *	Query the devices on this firmware driven port and slightly
  *	mash the identify data to stop us and common tools trying to
  *	use features not firmware supported. The firmware itself does
  *	some masking (eg SMART) but not enough.
- *
- *	This is a bit of an abuse of the cable method, but it is the
- *	only method called at the right time. We could modify the libata
- *	core specifically for ident hacking but while we have one offender
- *	it seems better to keep the fallout localised.
  */
 
-static int it821x_ident_hack(struct ata_port *ap)
+static unsigned int it821x_read_id(struct ata_device *adev,
+					struct ata_taskfile *tf, u16 *id)
 {
-	struct ata_device *adev;
-	ata_link_for_each_dev(adev, &ap->link) {
-		if (ata_dev_enabled(adev)) {
-			adev->id[84] &= ~(1 << 6);	/* No FUA */
-			adev->id[85] &= ~(1 << 10);	/* No HPA */
-			adev->id[76] = 0;		/* No NCQ/AN etc */
-		}
+	unsigned int err_mask;
+	unsigned char model_num[ATA_ID_PROD_LEN + 1];
+
+	err_mask = ata_do_dev_read_id(adev, tf, id);
+	if (err_mask)
+		return err_mask;
+	ata_id_c_string(id, model_num, ATA_ID_PROD, sizeof(model_num));
+
+	id[83] &= ~(1 << 12);	/* Cache flush is firmware handled */
+	id[83] &= ~(1 << 13);	/* Ditto for LBA48 flushes */
+	id[84] &= ~(1 << 6);	/* No FUA */
+	id[85] &= ~(1 << 10);	/* No HPA */
+	id[76] = 0;		/* No NCQ/AN etc */
+
+	if (strstr(model_num, "Integrated Technology Express")) {
+		/* Set feature bits the firmware neglects */
+		id[49] |= 0x0300;	/* LBA, DMA */
+		id[82] |= 0x0400;	/* LBA48 */
+		id[83] &= 0x7FFF;
+		id[83] |= 0x4000;	/* Word 83 is valid */
+		id[86] |= 0x0400;	/* LBA48 on */
+		id[ATA_ID_MAJOR_VER] |= 0x1F;
 	}
-	return ata_cable_unknown(ap);
+	return err_mask;
 }
 
-
 /**
  *	it821x_check_atapi_dma	-	ATAPI DMA handler
  *	@qc: Command we are about to issue
@@ -577,6 +593,136 @@ static int it821x_check_atapi_dma(struct ata_queued_cmd *qc)
 	return 0;
 }
 
+/**
+ *	it821x_display_disk	-	display disk setup
+ *	@n: Device number
+ *	@buf: Buffer block from firmware
+ *
+ *	Produce a nice informative display of the device setup as provided
+ *	by the firmware.
+ */
+
+static void it821x_display_disk(int n, u8 *buf)
+{
+	unsigned char id[41];
+	int mode = 0;
+	char *mtype;
+	char mbuf[8];
+	char *cbl = "(40 wire cable)";
+
+	static const char *types[5] = {
+		"RAID0", "RAID1" "RAID 0+1", "JBOD", "DISK"
+	};
+
+	if (buf[52] > 4)	/* No Disk */
+		return;
+
+	ata_id_c_string((u16 *)buf, id, 0, 41); 
+
+	if (buf[51]) {
+		mode = ffs(buf[51]);
+		mtype = "UDMA";
+	} else if (buf[49]) {
+		mode = ffs(buf[49]);
+		mtype = "MWDMA";
+	}
+
+	if (buf[76])
+		cbl = "";
+
+	if (mode)
+		snprintf(mbuf, 8, "%5s%d", mtype, mode - 1);
+	else
+		strcpy(mbuf, "PIO");
+	if (buf[52] == 4)
+		printk(KERN_INFO "%d: %-6s %-8s          %s %s\n",
+				n, mbuf, types[buf[52]], id, cbl);
+	else
+		printk(KERN_INFO "%d: %-6s %-8s Volume: %1d %s %s\n",
+				n, mbuf, types[buf[52]], buf[53], id, cbl);
+	if (buf[125] < 100)
+		printk(KERN_INFO "%d: Rebuilding: %d%%\n", n, buf[125]);
+}
+
+/**
+ *	it821x_firmware_command		-	issue firmware command
+ *	@ap: IT821x port to interrogate
+ *	@cmd: command
+ *	@len: length
+ *
+ *	Issue firmware commands expecting data back from the controller. We
+ *	use this to issue commands that do not go via the normal paths. Other
+ *	commands such as 0xFC can be issued normally.
+ */
+
+static u8 *it821x_firmware_command(struct ata_port *ap, u8 cmd, int len)
+{
+	u8 status;
+	int n = 0;
+	u16 *buf = kmalloc(len, GFP_KERNEL);
+	if (buf == NULL) {
+		printk(KERN_ERR "it821x_firmware_command: Out of memory\n");
+		return NULL;
+	}
+	/* This isn't quite a normal ATA command as we are talking to the
+	   firmware not the drives */
+	ap->ctl |= ATA_NIEN;
+	iowrite8(ap->ctl, ap->ioaddr.ctl_addr);
+	ata_wait_idle(ap);
+	iowrite8(ATA_DEVICE_OBS, ap->ioaddr.device_addr);
+	iowrite8(cmd, ap->ioaddr.command_addr);
+	udelay(1);
+	/* This should be almost immediate but a little paranoia goes a long
+	   way. */
+	while(n++ < 10) {
+		status = ioread8(ap->ioaddr.status_addr);
+		if (status & ATA_ERR) {
+			kfree(buf);
+			printk(KERN_ERR "it821x_firmware_command: rejected\n");
+			return NULL;
+		}
+		if (status & ATA_DRQ) {
+			ioread16_rep(ap->ioaddr.data_addr, buf, len/2);
+			return (u8 *)buf;
+		}
+		mdelay(1);
+	}
+	kfree(buf);
+	printk(KERN_ERR "it821x_firmware_command: timeout\n");
+	return NULL;
+}
+
+/**
+ *	it821x_probe_firmware	-	firmware reporting/setup
+ *	@ap: IT821x port being probed
+ *
+ *	Probe the firmware of the controller by issuing firmware command
+ *	0xFA and analysing the returned data.
+ */
+
+static void it821x_probe_firmware(struct ata_port *ap)
+{
+	u8 *buf;
+	int i;
+
+	/* This is a bit ugly as we can't just issue a task file to a device
+	   as this is controller magic */
+
+	buf = it821x_firmware_command(ap, 0xFA, 512);
+
+	if (buf != NULL) {
+		printk(KERN_INFO "pata_it821x: Firmware %02X/%02X/%02X%02X\n",
+				buf[505],
+				buf[506],
+				buf[507],
+				buf[508]);
+		for (i = 0; i < 4; i++)
+ 			it821x_display_disk(i, buf + 128 * i);
+		kfree(buf);
+	}
+}
+
+
 
 /**
  *	it821x_port_start	-	port setup
@@ -610,6 +756,8 @@ static int it821x_port_start(struct ata_port *ap)
 		/* Long I/O's although allowed in LBA48 space cause the
 		   onboard firmware to enter the twighlight zone */
 		/* No ATAPI DMA in this mode either */
+		if (ap->port_no == 0)
+			it821x_probe_firmware(ap);
 	}
 	/* Pull the current clocks from 0x50 */
 	if (conf & (1 << (1 + ap->port_no)))
@@ -631,6 +779,25 @@ static int it821x_port_start(struct ata_port *ap)
 	return 0;
 }
 
+/**
+ *	it821x_rdc_cable	-	Cable detect for RDC1010
+ *	@ap: port we are checking
+ *
+ *	Return the RDC1010 cable type. Unlike the IT821x we know how to do
+ *	this and can do host side cable detect
+ */
+
+static int it821x_rdc_cable(struct ata_port *ap)
+{
+	u16 r40;
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+
+	pci_read_config_word(pdev, 0x40, &r40);
+	if (r40 & (1 << (2 + ap->port_no)))
+		return ATA_CBL_PATA40;
+	return ATA_CBL_PATA80;
+}
+
 static struct scsi_host_template it821x_sht = {
 	ATA_BMDMA_SHT(DRV_NAME),
 };
@@ -641,9 +808,10 @@ static struct ata_port_operations it821x_smart_port_ops = {
 	.check_atapi_dma= it821x_check_atapi_dma,
 	.qc_issue	= it821x_smart_qc_issue,
 
-	.cable_detect	= it821x_ident_hack,
+	.cable_detect	= ata_cable_80wire,
 	.set_mode	= it821x_smart_set_mode,
 	.dev_config	= it821x_dev_config,
+	.read_id	= it821x_read_id,
 
 	.port_start	= it821x_port_start,
 };
@@ -664,8 +832,29 @@ static struct ata_port_operations it821x_passthru_port_ops = {
 	.port_start	= it821x_port_start,
 };
 
+static struct ata_port_operations it821x_rdc_port_ops = {
+	.inherits	= &ata_bmdma_port_ops,
+
+	.check_atapi_dma= it821x_check_atapi_dma,
+	.sff_dev_select	= it821x_passthru_dev_select,
+	.bmdma_start 	= it821x_passthru_bmdma_start,
+	.bmdma_stop	= it821x_passthru_bmdma_stop,
+	.qc_issue	= it821x_passthru_qc_issue,
+
+	.cable_detect	= it821x_rdc_cable,
+	.set_piomode	= it821x_passthru_set_piomode,
+	.set_dmamode	= it821x_passthru_set_dmamode,
+
+	.port_start	= it821x_port_start,
+};
+
 static void it821x_disable_raid(struct pci_dev *pdev)
 {
+	/* Neither the RDC nor the IT8211 */
+	if (pdev->vendor != PCI_VENDOR_ID_ITE ||
+			pdev->device != PCI_DEVICE_ID_ITE_8212)
+			return;
+
 	/* Reset local CPU, and set BIOS not ready */
 	pci_write_config_byte(pdev, 0x5E, 0x01);
 
@@ -690,6 +879,7 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		.flags = ATA_FLAG_SLAVE_POSS,
 		.pio_mask = 0x1f,
 		.mwdma_mask = 0x07,
+		.udma_mask = ATA_UDMA6,
 		.port_ops = &it821x_smart_port_ops
 	};
 	static const struct ata_port_info info_passthru = {
@@ -699,6 +889,13 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		.udma_mask = ATA_UDMA6,
 		.port_ops = &it821x_passthru_port_ops
 	};
+	static const struct ata_port_info info_rdc = {
+		.flags = ATA_FLAG_SLAVE_POSS,
+		.pio_mask = 0x1f,
+		.mwdma_mask = 0x07,
+		/* No UDMA */
+		.port_ops = &it821x_rdc_port_ops
+	};
 
 	const struct ata_port_info *ppi[] = { NULL, NULL };
 	static char *mode[2] = { "pass through", "smart" };
@@ -707,21 +904,25 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	rc = pcim_enable_device(pdev);
 	if (rc)
 		return rc;
+		
+	if (pdev->vendor == PCI_VENDOR_ID_RDC) {
+		ppi[0] = &info_rdc;
+	} else {
+		/* Force the card into bypass mode if so requested */
+		if (it8212_noraid) {
+			printk(KERN_INFO DRV_NAME ": forcing bypass mode.\n");
+			it821x_disable_raid(pdev);
+		}
+		pci_read_config_byte(pdev, 0x50, &conf);
+		conf &= 1;
 
-	/* Force the card into bypass mode if so requested */
-	if (it8212_noraid) {
-		printk(KERN_INFO DRV_NAME ": forcing bypass mode.\n");
-		it821x_disable_raid(pdev);
+		printk(KERN_INFO DRV_NAME": controller in %s mode.\n",
+								mode[conf]);
+		if (conf == 0)
+			ppi[0] = &info_passthru;
+		else
+			ppi[0] = &info_smart;
 	}
-	pci_read_config_byte(pdev, 0x50, &conf);
-	conf &= 1;
-
-	printk(KERN_INFO DRV_NAME ": controller in %s mode.\n", mode[conf]);
-	if (conf == 0)
-		ppi[0] = &info_passthru;
-	else
-		ppi[0] = &info_smart;
-
 	return ata_pci_sff_init_one(pdev, ppi, &it821x_sht, NULL);
 }
 
@@ -745,6 +946,7 @@ static int it821x_reinit_one(struct pci_dev *pdev)
 static const struct pci_device_id it821x[] = {
 	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8211), },
 	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8212), },
+	{ PCI_VDEVICE(RDC, 0x1010), },
 
 	{ },
 };
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d4b8e5fa3e8b..06b80337303b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -750,6 +750,7 @@ struct ata_port_operations {
 	void (*set_piomode)(struct ata_port *ap, struct ata_device *dev);
 	void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev);
 	int  (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev);
+	unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, u16 *id);
 
 	void (*dev_config)(struct ata_device *dev);
 
@@ -951,6 +952,8 @@ extern void ata_id_string(const u16 *id, unsigned char *s,
 			  unsigned int ofs, unsigned int len);
 extern void ata_id_c_string(const u16 *id, unsigned char *s,
 			    unsigned int ofs, unsigned int len);
+extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
+					struct ata_taskfile *tf, u16 *id);
 extern void ata_qc_complete(struct ata_queued_cmd *qc);
 extern int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active);
 extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
-- 
cgit v1.2.3


From ae375044d31075a31de5a839e07ded7f67b660aa Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 31 Jul 2008 00:38:01 -0700
Subject: netfilter: nf_conntrack_tcp: decrease timeouts while data in
 unacknowledged

In order to time out dead connections quicker, keep track of outstanding data
and cap the timeout.

Suggested by Herbert Xu.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nf_conntrack_tcp.h |  3 +++
 net/netfilter/nf_conntrack_proto_tcp.c     | 29 ++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 22ce29995f13..a049df4f2236 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -30,6 +30,9 @@ enum tcp_conntrack {
 /* Be liberal in window checking */
 #define IP_CT_TCP_FLAG_BE_LIBERAL		0x08
 
+/* Has unacknowledged data */
+#define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED	0x10
+
 struct nf_ct_tcp_flags {
 	u_int8_t flags;
 	u_int8_t mask;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 420a10d8eb1e..6f61261888ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -67,7 +67,8 @@ static const char *const tcp_conntrack_names[] = {
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds
    to ~13-30min depending on RTO. */
-static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
+static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly    =   5 MINS;
+static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly =   5 MINS;
 
 static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
 	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
@@ -625,8 +626,10 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		swin = win + (sack - ack);
 		if (sender->td_maxwin < swin)
 			sender->td_maxwin = swin;
-		if (after(end, sender->td_end))
+		if (after(end, sender->td_end)) {
 			sender->td_end = end;
+			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+		}
 		/*
 		 * Update receiver data.
 		 */
@@ -637,6 +640,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			if (win == 0)
 				receiver->td_maxend++;
 		}
+		if (ack == receiver->td_end)
+			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 
 		/*
 		 * Check retransmissions.
@@ -951,9 +956,16 @@ static int tcp_packet(struct nf_conn *ct,
 	if (old_state != new_state
 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-	timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans
-		  && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
-		  ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state];
+
+	if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
+	    tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans)
+		timeout = nf_ct_tcp_timeout_max_retrans;
+	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
+		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
+		 tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged)
+		timeout = nf_ct_tcp_timeout_unacknowledged;
+	else
+		timeout = tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
 	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
@@ -1235,6 +1247,13 @@ static struct ctl_table tcp_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
+	{
+		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
+		.data		= &nf_ct_tcp_timeout_unacknowledged,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
 	{
 		.ctl_name	= NET_NF_CONNTRACK_TCP_LOOSE,
 		.procname	= "nf_conntrack_tcp_loose",
-- 
cgit v1.2.3


From dacdd0e04768da1fd2b24a6ee274c582b40d0c5b Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 17 Jul 2008 16:54:19 -0700
Subject: [PATCH] configfs: Include linux/err.h in linux/configfs.h

We now use PTR_ERR() in the ->make_item() and ->make_group() operations.
Folks including configfs.h need err.h.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/dir.c        | 2 +-
 include/linux/configfs.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063a..2495f23e33f4 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1094,7 +1094,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	kfree(name);
 	if (ret) {
 		/*
-		 * If item == NULL, then link_obj() was never called.
+		 * If ret != 0, then link_obj() was never called.
 		 * There are no extra references to clean up.
 		 */
 		goto out_put;
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index d62c19ff041c..0a5491baf0bc 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/err.h>
 
 #include <asm/atomic.h>
 
-- 
cgit v1.2.3


From ecb3d28c7edd58b54f16838c434b342ba9195bec Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 18 Jun 2008 19:29:05 -0700
Subject: [PATCH] configfs: Convenience macros for attribute definition.

Sysfs has the _ATTR() and _ATTR_RO() macros to make defining extended
form attributes easier.  configfs should have something similiar.

- _CONFIGFS_ATTR() and _CONFIGFS_ATTR_RO() are the counterparts to the
  sysfs macros.
- CONFIGFS_ATTR_STRUCT() creates the extended form attribute structure.
- CONFIGFS_ATTR_OPS() defines the show_attribute()/store_attribute()
  operations that call the show()/store() operations of the extended
  form configfs_attributes.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 Documentation/filesystems/configfs/configfs.txt    |  17 +-
 .../filesystems/configfs/configfs_example.c        | 485 ---------------------
 .../configfs/configfs_example_explicit.c           | 485 +++++++++++++++++++++
 .../filesystems/configfs/configfs_example_macros.c | 448 +++++++++++++++++++
 include/linux/configfs.h                           |  67 ++-
 5 files changed, 1012 insertions(+), 490 deletions(-)
 delete mode 100644 Documentation/filesystems/configfs/configfs_example.c
 create mode 100644 Documentation/filesystems/configfs/configfs_example_explicit.c
 create mode 100644 Documentation/filesystems/configfs/configfs_example_macros.c

(limited to 'include/linux')

diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
index 44c97e6accb2..fabcb0e00f25 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -311,9 +311,20 @@ the subsystem must be ready for it.
 [An Example]
 
 The best example of these basic concepts is the simple_children
-subsystem/group and the simple_child item in configfs_example.c  It
-shows a trivial object displaying and storing an attribute, and a simple
-group creating and destroying these children.
+subsystem/group and the simple_child item in configfs_example_explicit.c
+and configfs_example_macros.c.  It shows a trivial object displaying and
+storing an attribute, and a simple group creating and destroying these
+children.
+
+The only difference between configfs_example_explicit.c and
+configfs_example_macros.c is how the attributes of the childless item
+are defined.  The childless item has extended attributes, each with
+their own show()/store() operation.  This follows a convention commonly
+used in sysfs.  configfs_example_explicit.c creates these attributes
+by explicitly defining the structures involved.  Conversely
+configfs_example_macros.c uses some convenience macros from configfs.h
+to define the attributes.  These macros are similar to their sysfs
+counterparts.
 
 [Hierarchy Navigation and the Subsystem Mutex]
 
diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c
deleted file mode 100644
index 039648791701..000000000000
--- a/Documentation/filesystems/configfs/configfs_example.c
+++ /dev/null
@@ -1,485 +0,0 @@
-/*
- * vim: noexpandtab ts=8 sts=0 sw=8:
- *
- * configfs_example.c - This file is a demonstration module containing
- *      a number of configfs subsystems.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Based on sysfs:
- * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
- *
- * configfs Copyright (C) 2005 Oracle.  All rights reserved.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <linux/configfs.h>
-
-
-
-/*
- * 01-childless
- *
- * This first example is a childless subsystem.  It cannot create
- * any config_items.  It just has attributes.
- *
- * Note that we are enclosing the configfs_subsystem inside a container.
- * This is not necessary if a subsystem has no attributes directly
- * on the subsystem.  See the next example, 02-simple-children, for
- * such a subsystem.
- */
-
-struct childless {
-	struct configfs_subsystem subsys;
-	int showme;
-	int storeme;
-};
-
-struct childless_attribute {
-	struct configfs_attribute attr;
-	ssize_t (*show)(struct childless *, char *);
-	ssize_t (*store)(struct childless *, const char *, size_t);
-};
-
-static inline struct childless *to_childless(struct config_item *item)
-{
-	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
-}
-
-static ssize_t childless_showme_read(struct childless *childless,
-				     char *page)
-{
-	ssize_t pos;
-
-	pos = sprintf(page, "%d\n", childless->showme);
-	childless->showme++;
-
-	return pos;
-}
-
-static ssize_t childless_storeme_read(struct childless *childless,
-				      char *page)
-{
-	return sprintf(page, "%d\n", childless->storeme);
-}
-
-static ssize_t childless_storeme_write(struct childless *childless,
-				       const char *page,
-				       size_t count)
-{
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	childless->storeme = tmp;
-
-	return count;
-}
-
-static ssize_t childless_description_read(struct childless *childless,
-					  char *page)
-{
-	return sprintf(page,
-"[01-childless]\n"
-"\n"
-"The childless subsystem is the simplest possible subsystem in\n"
-"configfs.  It does not support the creation of child config_items.\n"
-"It only has a few attributes.  In fact, it isn't much different\n"
-"than a directory in /proc.\n");
-}
-
-static struct childless_attribute childless_attr_showme = {
-	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO },
-	.show	= childless_showme_read,
-};
-static struct childless_attribute childless_attr_storeme = {
-	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR },
-	.show	= childless_storeme_read,
-	.store	= childless_storeme_write,
-};
-static struct childless_attribute childless_attr_description = {
-	.attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO },
-	.show = childless_description_read,
-};
-
-static struct configfs_attribute *childless_attrs[] = {
-	&childless_attr_showme.attr,
-	&childless_attr_storeme.attr,
-	&childless_attr_description.attr,
-	NULL,
-};
-
-static ssize_t childless_attr_show(struct config_item *item,
-				   struct configfs_attribute *attr,
-				   char *page)
-{
-	struct childless *childless = to_childless(item);
-	struct childless_attribute *childless_attr =
-		container_of(attr, struct childless_attribute, attr);
-	ssize_t ret = 0;
-
-	if (childless_attr->show)
-		ret = childless_attr->show(childless, page);
-	return ret;
-}
-
-static ssize_t childless_attr_store(struct config_item *item,
-				    struct configfs_attribute *attr,
-				    const char *page, size_t count)
-{
-	struct childless *childless = to_childless(item);
-	struct childless_attribute *childless_attr =
-		container_of(attr, struct childless_attribute, attr);
-	ssize_t ret = -EINVAL;
-
-	if (childless_attr->store)
-		ret = childless_attr->store(childless, page, count);
-	return ret;
-}
-
-static struct configfs_item_operations childless_item_ops = {
-	.show_attribute		= childless_attr_show,
-	.store_attribute	= childless_attr_store,
-};
-
-static struct config_item_type childless_type = {
-	.ct_item_ops	= &childless_item_ops,
-	.ct_attrs	= childless_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct childless childless_subsys = {
-	.subsys = {
-		.su_group = {
-			.cg_item = {
-				.ci_namebuf = "01-childless",
-				.ci_type = &childless_type,
-			},
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 02-simple-children
- *
- * This example merely has a simple one-attribute child.  Note that
- * there is no extra attribute structure, as the child's attribute is
- * known from the get-go.  Also, there is no container for the
- * subsystem, as it has no attributes of its own.
- */
-
-struct simple_child {
-	struct config_item item;
-	int storeme;
-};
-
-static inline struct simple_child *to_simple_child(struct config_item *item)
-{
-	return item ? container_of(item, struct simple_child, item) : NULL;
-}
-
-static struct configfs_attribute simple_child_attr_storeme = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "storeme",
-	.ca_mode = S_IRUGO | S_IWUSR,
-};
-
-static struct configfs_attribute *simple_child_attrs[] = {
-	&simple_child_attr_storeme,
-	NULL,
-};
-
-static ssize_t simple_child_attr_show(struct config_item *item,
-				      struct configfs_attribute *attr,
-				      char *page)
-{
-	ssize_t count;
-	struct simple_child *simple_child = to_simple_child(item);
-
-	count = sprintf(page, "%d\n", simple_child->storeme);
-
-	return count;
-}
-
-static ssize_t simple_child_attr_store(struct config_item *item,
-				       struct configfs_attribute *attr,
-				       const char *page, size_t count)
-{
-	struct simple_child *simple_child = to_simple_child(item);
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	simple_child->storeme = tmp;
-
-	return count;
-}
-
-static void simple_child_release(struct config_item *item)
-{
-	kfree(to_simple_child(item));
-}
-
-static struct configfs_item_operations simple_child_item_ops = {
-	.release		= simple_child_release,
-	.show_attribute		= simple_child_attr_show,
-	.store_attribute	= simple_child_attr_store,
-};
-
-static struct config_item_type simple_child_type = {
-	.ct_item_ops	= &simple_child_item_ops,
-	.ct_attrs	= simple_child_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-
-struct simple_children {
-	struct config_group group;
-};
-
-static inline struct simple_children *to_simple_children(struct config_item *item)
-{
-	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
-}
-
-static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
-{
-	struct simple_child *simple_child;
-
-	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
-	if (!simple_child)
-		return ERR_PTR(-ENOMEM);
-
-
-	config_item_init_type_name(&simple_child->item, name,
-				   &simple_child_type);
-
-	simple_child->storeme = 0;
-
-	return &simple_child->item;
-}
-
-static struct configfs_attribute simple_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *simple_children_attrs[] = {
-	&simple_children_attr_description,
-	NULL,
-};
-
-static ssize_t simple_children_attr_show(struct config_item *item,
-			   		 struct configfs_attribute *attr,
-			   		 char *page)
-{
-	return sprintf(page,
-"[02-simple-children]\n"
-"\n"
-"This subsystem allows the creation of child config_items.  These\n"
-"items have only one attribute that is readable and writeable.\n");
-}
-
-static void simple_children_release(struct config_item *item)
-{
-	kfree(to_simple_children(item));
-}
-
-static struct configfs_item_operations simple_children_item_ops = {
-	.release 	= simple_children_release,
-	.show_attribute	= simple_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations simple_children_group_ops = {
-	.make_item	= simple_children_make_item,
-};
-
-static struct config_item_type simple_children_type = {
-	.ct_item_ops	= &simple_children_item_ops,
-	.ct_group_ops	= &simple_children_group_ops,
-	.ct_attrs	= simple_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem simple_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "02-simple-children",
-			.ci_type = &simple_children_type,
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 03-group-children
- *
- * This example reuses the simple_children group from above.  However,
- * the simple_children group is not the subsystem itself, it is a
- * child of the subsystem.  Creation of a group in the subsystem creates
- * a new simple_children group.  That group can then have simple_child
- * children of its own.
- */
-
-static struct config_group *group_children_make_group(struct config_group *group, const char *name)
-{
-	struct simple_children *simple_children;
-
-	simple_children = kzalloc(sizeof(struct simple_children),
-				  GFP_KERNEL);
-	if (!simple_children)
-		return ERR_PTR(-ENOMEM);
-
-
-	config_group_init_type_name(&simple_children->group, name,
-				    &simple_children_type);
-
-	return &simple_children->group;
-}
-
-static struct configfs_attribute group_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *group_children_attrs[] = {
-	&group_children_attr_description,
-	NULL,
-};
-
-static ssize_t group_children_attr_show(struct config_item *item,
-			   		struct configfs_attribute *attr,
-			   		char *page)
-{
-	return sprintf(page,
-"[03-group-children]\n"
-"\n"
-"This subsystem allows the creation of child config_groups.  These\n"
-"groups are like the subsystem simple-children.\n");
-}
-
-static struct configfs_item_operations group_children_item_ops = {
-	.show_attribute	= group_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations group_children_group_ops = {
-	.make_group	= group_children_make_group,
-};
-
-static struct config_item_type group_children_type = {
-	.ct_item_ops	= &group_children_item_ops,
-	.ct_group_ops	= &group_children_group_ops,
-	.ct_attrs	= group_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem group_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "03-group-children",
-			.ci_type = &group_children_type,
-		},
-	},
-};
-
-/* ----------------------------------------------------------------- */
-
-/*
- * We're now done with our subsystem definitions.
- * For convenience in this module, here's a list of them all.  It
- * allows the init function to easily register them.  Most modules
- * will only have one subsystem, and will only call register_subsystem
- * on it directly.
- */
-static struct configfs_subsystem *example_subsys[] = {
-	&childless_subsys.subsys,
-	&simple_children_subsys,
-	&group_children_subsys,
-	NULL,
-};
-
-static int __init configfs_example_init(void)
-{
-	int ret;
-	int i;
-	struct configfs_subsystem *subsys;
-
-	for (i = 0; example_subsys[i]; i++) {
-		subsys = example_subsys[i];
-
-		config_group_init(&subsys->su_group);
-		mutex_init(&subsys->su_mutex);
-		ret = configfs_register_subsystem(subsys);
-		if (ret) {
-			printk(KERN_ERR "Error %d while registering subsystem %s\n",
-			       ret,
-			       subsys->su_group.cg_item.ci_namebuf);
-			goto out_unregister;
-		}
-	}
-
-	return 0;
-
-out_unregister:
-	for (; i >= 0; i--) {
-		configfs_unregister_subsystem(example_subsys[i]);
-	}
-
-	return ret;
-}
-
-static void __exit configfs_example_exit(void)
-{
-	int i;
-
-	for (i = 0; example_subsys[i]; i++) {
-		configfs_unregister_subsystem(example_subsys[i]);
-	}
-}
-
-module_init(configfs_example_init);
-module_exit(configfs_example_exit);
-MODULE_LICENSE("GPL");
diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c
new file mode 100644
index 000000000000..d428cc9f07f3
--- /dev/null
+++ b/Documentation/filesystems/configfs/configfs_example_explicit.c
@@ -0,0 +1,485 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example_explicit.c - This file is a demonstration module
+ *      containing a number of configfs subsystems.  It explicitly defines
+ *      each structure without using the helper macros defined in
+ *      configfs.h.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem.  It cannot create
+ * any config_items.  It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem.  See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+	struct configfs_subsystem subsys;
+	int showme;
+	int storeme;
+};
+
+struct childless_attribute {
+	struct configfs_attribute attr;
+	ssize_t (*show)(struct childless *, char *);
+	ssize_t (*store)(struct childless *, const char *, size_t);
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
+}
+
+static ssize_t childless_showme_read(struct childless *childless,
+				     char *page)
+{
+	ssize_t pos;
+
+	pos = sprintf(page, "%d\n", childless->showme);
+	childless->showme++;
+
+	return pos;
+}
+
+static ssize_t childless_storeme_read(struct childless *childless,
+				      char *page)
+{
+	return sprintf(page, "%d\n", childless->storeme);
+}
+
+static ssize_t childless_storeme_write(struct childless *childless,
+				       const char *page,
+				       size_t count)
+{
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	childless->storeme = tmp;
+
+	return count;
+}
+
+static ssize_t childless_description_read(struct childless *childless,
+					  char *page)
+{
+	return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs.  It does not support the creation of child config_items.\n"
+"It only has a few attributes.  In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+static struct childless_attribute childless_attr_showme = {
+	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO },
+	.show	= childless_showme_read,
+};
+static struct childless_attribute childless_attr_storeme = {
+	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= childless_storeme_read,
+	.store	= childless_storeme_write,
+};
+static struct childless_attribute childless_attr_description = {
+	.attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO },
+	.show = childless_description_read,
+};
+
+static struct configfs_attribute *childless_attrs[] = {
+	&childless_attr_showme.attr,
+	&childless_attr_storeme.attr,
+	&childless_attr_description.attr,
+	NULL,
+};
+
+static ssize_t childless_attr_show(struct config_item *item,
+				   struct configfs_attribute *attr,
+				   char *page)
+{
+	struct childless *childless = to_childless(item);
+	struct childless_attribute *childless_attr =
+		container_of(attr, struct childless_attribute, attr);
+	ssize_t ret = 0;
+
+	if (childless_attr->show)
+		ret = childless_attr->show(childless, page);
+	return ret;
+}
+
+static ssize_t childless_attr_store(struct config_item *item,
+				    struct configfs_attribute *attr,
+				    const char *page, size_t count)
+{
+	struct childless *childless = to_childless(item);
+	struct childless_attribute *childless_attr =
+		container_of(attr, struct childless_attribute, attr);
+	ssize_t ret = -EINVAL;
+
+	if (childless_attr->store)
+		ret = childless_attr->store(childless, page, count);
+	return ret;
+}
+
+static struct configfs_item_operations childless_item_ops = {
+	.show_attribute		= childless_attr_show,
+	.store_attribute	= childless_attr_store,
+};
+
+static struct config_item_type childless_type = {
+	.ct_item_ops	= &childless_item_ops,
+	.ct_attrs	= childless_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+	.subsys = {
+		.su_group = {
+			.cg_item = {
+				.ci_namebuf = "01-childless",
+				.ci_type = &childless_type,
+			},
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child.  Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go.  Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+	struct config_item item;
+	int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+	return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static struct configfs_attribute simple_child_attr_storeme = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "storeme",
+	.ca_mode = S_IRUGO | S_IWUSR,
+};
+
+static struct configfs_attribute *simple_child_attrs[] = {
+	&simple_child_attr_storeme,
+	NULL,
+};
+
+static ssize_t simple_child_attr_show(struct config_item *item,
+				      struct configfs_attribute *attr,
+				      char *page)
+{
+	ssize_t count;
+	struct simple_child *simple_child = to_simple_child(item);
+
+	count = sprintf(page, "%d\n", simple_child->storeme);
+
+	return count;
+}
+
+static ssize_t simple_child_attr_store(struct config_item *item,
+				       struct configfs_attribute *attr,
+				       const char *page, size_t count)
+{
+	struct simple_child *simple_child = to_simple_child(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	simple_child->storeme = tmp;
+
+	return count;
+}
+
+static void simple_child_release(struct config_item *item)
+{
+	kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+	.release		= simple_child_release,
+	.show_attribute		= simple_child_attr_show,
+	.store_attribute	= simple_child_attr_store,
+};
+
+static struct config_item_type simple_child_type = {
+	.ct_item_ops	= &simple_child_item_ops,
+	.ct_attrs	= simple_child_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+
+struct simple_children {
+	struct config_group group;
+};
+
+static inline struct simple_children *to_simple_children(struct config_item *item)
+{
+	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
+}
+
+static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
+{
+	struct simple_child *simple_child;
+
+	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
+	if (!simple_child)
+		return ERR_PTR(-ENOMEM);
+
+	config_item_init_type_name(&simple_child->item, name,
+				   &simple_child_type);
+
+	simple_child->storeme = 0;
+
+	return &simple_child->item;
+}
+
+static struct configfs_attribute simple_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *simple_children_attrs[] = {
+	&simple_children_attr_description,
+	NULL,
+};
+
+static ssize_t simple_children_attr_show(struct config_item *item,
+					 struct configfs_attribute *attr,
+					 char *page)
+{
+	return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items.  These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+static void simple_children_release(struct config_item *item)
+{
+	kfree(to_simple_children(item));
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+	.release	= simple_children_release,
+	.show_attribute	= simple_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+	.make_item	= simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+	.ct_item_ops	= &simple_children_item_ops,
+	.ct_group_ops	= &simple_children_group_ops,
+	.ct_attrs	= simple_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "02-simple-children",
+			.ci_type = &simple_children_type,
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above.  However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem.  Creation of a group in the subsystem creates
+ * a new simple_children group.  That group can then have simple_child
+ * children of its own.
+ */
+
+static struct config_group *group_children_make_group(struct config_group *group, const char *name)
+{
+	struct simple_children *simple_children;
+
+	simple_children = kzalloc(sizeof(struct simple_children),
+				  GFP_KERNEL);
+	if (!simple_children)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&simple_children->group, name,
+				    &simple_children_type);
+
+	return &simple_children->group;
+}
+
+static struct configfs_attribute group_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *group_children_attrs[] = {
+	&group_children_attr_description,
+	NULL,
+};
+
+static ssize_t group_children_attr_show(struct config_item *item,
+					struct configfs_attribute *attr,
+					char *page)
+{
+	return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups.  These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+static struct configfs_item_operations group_children_item_ops = {
+	.show_attribute	= group_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+	.make_group	= group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+	.ct_item_ops	= &group_children_item_ops,
+	.ct_group_ops	= &group_children_group_ops,
+	.ct_attrs	= group_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "03-group-children",
+			.ci_type = &group_children_type,
+		},
+	},
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all.  It
+ * allows the init function to easily register them.  Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+	&childless_subsys.subsys,
+	&simple_children_subsys,
+	&group_children_subsys,
+	NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+	int ret;
+	int i;
+	struct configfs_subsystem *subsys;
+
+	for (i = 0; example_subsys[i]; i++) {
+		subsys = example_subsys[i];
+
+		config_group_init(&subsys->su_group);
+		mutex_init(&subsys->su_mutex);
+		ret = configfs_register_subsystem(subsys);
+		if (ret) {
+			printk(KERN_ERR "Error %d while registering subsystem %s\n",
+			       ret,
+			       subsys->su_group.cg_item.ci_namebuf);
+			goto out_unregister;
+		}
+	}
+
+	return 0;
+
+out_unregister:
+	for (; i >= 0; i--) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+
+	return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+	int i;
+
+	for (i = 0; example_subsys[i]; i++) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
diff --git a/Documentation/filesystems/configfs/configfs_example_macros.c b/Documentation/filesystems/configfs/configfs_example_macros.c
new file mode 100644
index 000000000000..d8e30a0378aa
--- /dev/null
+++ b/Documentation/filesystems/configfs/configfs_example_macros.c
@@ -0,0 +1,448 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example_macros.c - This file is a demonstration module
+ *      containing a number of configfs subsystems.  It uses the helper
+ *      macros defined by configfs.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem.  It cannot create
+ * any config_items.  It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem.  See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+	struct configfs_subsystem subsys;
+	int showme;
+	int storeme;
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
+}
+
+CONFIGFS_ATTR_STRUCT(childless);
+#define CHILDLESS_ATTR(_name, _mode, _show, _store)	\
+struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR(_name, _mode, _show, _store)
+#define CHILDLESS_ATTR_RO(_name, _show)	\
+struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR_RO(_name, _show);
+
+static ssize_t childless_showme_read(struct childless *childless,
+				     char *page)
+{
+	ssize_t pos;
+
+	pos = sprintf(page, "%d\n", childless->showme);
+	childless->showme++;
+
+	return pos;
+}
+
+static ssize_t childless_storeme_read(struct childless *childless,
+				      char *page)
+{
+	return sprintf(page, "%d\n", childless->storeme);
+}
+
+static ssize_t childless_storeme_write(struct childless *childless,
+				       const char *page,
+				       size_t count)
+{
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	childless->storeme = tmp;
+
+	return count;
+}
+
+static ssize_t childless_description_read(struct childless *childless,
+					  char *page)
+{
+	return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs.  It does not support the creation of child config_items.\n"
+"It only has a few attributes.  In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+CHILDLESS_ATTR_RO(showme, childless_showme_read);
+CHILDLESS_ATTR(storeme, S_IRUGO | S_IWUSR, childless_storeme_read,
+	       childless_storeme_write);
+CHILDLESS_ATTR_RO(description, childless_description_read);
+
+static struct configfs_attribute *childless_attrs[] = {
+	&childless_attr_showme.attr,
+	&childless_attr_storeme.attr,
+	&childless_attr_description.attr,
+	NULL,
+};
+
+CONFIGFS_ATTR_OPS(childless);
+static struct configfs_item_operations childless_item_ops = {
+	.show_attribute		= childless_attr_show,
+	.store_attribute	= childless_attr_store,
+};
+
+static struct config_item_type childless_type = {
+	.ct_item_ops	= &childless_item_ops,
+	.ct_attrs	= childless_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+	.subsys = {
+		.su_group = {
+			.cg_item = {
+				.ci_namebuf = "01-childless",
+				.ci_type = &childless_type,
+			},
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child.  Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go.  Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+	struct config_item item;
+	int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+	return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static struct configfs_attribute simple_child_attr_storeme = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "storeme",
+	.ca_mode = S_IRUGO | S_IWUSR,
+};
+
+static struct configfs_attribute *simple_child_attrs[] = {
+	&simple_child_attr_storeme,
+	NULL,
+};
+
+static ssize_t simple_child_attr_show(struct config_item *item,
+				      struct configfs_attribute *attr,
+				      char *page)
+{
+	ssize_t count;
+	struct simple_child *simple_child = to_simple_child(item);
+
+	count = sprintf(page, "%d\n", simple_child->storeme);
+
+	return count;
+}
+
+static ssize_t simple_child_attr_store(struct config_item *item,
+				       struct configfs_attribute *attr,
+				       const char *page, size_t count)
+{
+	struct simple_child *simple_child = to_simple_child(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	simple_child->storeme = tmp;
+
+	return count;
+}
+
+static void simple_child_release(struct config_item *item)
+{
+	kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+	.release		= simple_child_release,
+	.show_attribute		= simple_child_attr_show,
+	.store_attribute	= simple_child_attr_store,
+};
+
+static struct config_item_type simple_child_type = {
+	.ct_item_ops	= &simple_child_item_ops,
+	.ct_attrs	= simple_child_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+
+struct simple_children {
+	struct config_group group;
+};
+
+static inline struct simple_children *to_simple_children(struct config_item *item)
+{
+	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
+}
+
+static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
+{
+	struct simple_child *simple_child;
+
+	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
+	if (!simple_child)
+		return ERR_PTR(-ENOMEM);
+
+	config_item_init_type_name(&simple_child->item, name,
+				   &simple_child_type);
+
+	simple_child->storeme = 0;
+
+	return &simple_child->item;
+}
+
+static struct configfs_attribute simple_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *simple_children_attrs[] = {
+	&simple_children_attr_description,
+	NULL,
+};
+
+static ssize_t simple_children_attr_show(struct config_item *item,
+					 struct configfs_attribute *attr,
+					 char *page)
+{
+	return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items.  These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+static void simple_children_release(struct config_item *item)
+{
+	kfree(to_simple_children(item));
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+	.release	= simple_children_release,
+	.show_attribute	= simple_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+	.make_item	= simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+	.ct_item_ops	= &simple_children_item_ops,
+	.ct_group_ops	= &simple_children_group_ops,
+	.ct_attrs	= simple_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "02-simple-children",
+			.ci_type = &simple_children_type,
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above.  However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem.  Creation of a group in the subsystem creates
+ * a new simple_children group.  That group can then have simple_child
+ * children of its own.
+ */
+
+static struct config_group *group_children_make_group(struct config_group *group, const char *name)
+{
+	struct simple_children *simple_children;
+
+	simple_children = kzalloc(sizeof(struct simple_children),
+				  GFP_KERNEL);
+	if (!simple_children)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&simple_children->group, name,
+				    &simple_children_type);
+
+	return &simple_children->group;
+}
+
+static struct configfs_attribute group_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *group_children_attrs[] = {
+	&group_children_attr_description,
+	NULL,
+};
+
+static ssize_t group_children_attr_show(struct config_item *item,
+					struct configfs_attribute *attr,
+					char *page)
+{
+	return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups.  These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+static struct configfs_item_operations group_children_item_ops = {
+	.show_attribute	= group_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+	.make_group	= group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+	.ct_item_ops	= &group_children_item_ops,
+	.ct_group_ops	= &group_children_group_ops,
+	.ct_attrs	= group_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "03-group-children",
+			.ci_type = &group_children_type,
+		},
+	},
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all.  It
+ * allows the init function to easily register them.  Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+	&childless_subsys.subsys,
+	&simple_children_subsys,
+	&group_children_subsys,
+	NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+	int ret;
+	int i;
+	struct configfs_subsystem *subsys;
+
+	for (i = 0; example_subsys[i]; i++) {
+		subsys = example_subsys[i];
+
+		config_group_init(&subsys->su_group);
+		mutex_init(&subsys->su_mutex);
+		ret = configfs_register_subsystem(subsys);
+		if (ret) {
+			printk(KERN_ERR "Error %d while registering subsystem %s\n",
+			       ret,
+			       subsys->su_group.cg_item.ci_namebuf);
+			goto out_unregister;
+		}
+	}
+
+	return 0;
+
+out_unregister:
+	for (; i >= 0; i--) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+
+	return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+	int i;
+
+	for (i = 0; example_subsys[i]; i++) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 0a5491baf0bc..7f627775c947 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -130,8 +130,25 @@ struct configfs_attribute {
 /*
  * Users often need to create attribute structures for their configurable
  * attributes, containing a configfs_attribute member and function pointers
- * for the show() and store() operations on that attribute. They can use
- * this macro (similar to sysfs' __ATTR) to make defining attributes easier.
+ * for the show() and store() operations on that attribute. If they don't
+ * need anything else on the extended attribute structure, they can use
+ * this macro to define it  The argument _item is the name of the
+ * config_item structure.
+ */
+#define CONFIGFS_ATTR_STRUCT(_item)					\
+struct _item##_attribute {						\
+	struct configfs_attribute attr;					\
+	ssize_t (*show)(struct _item *, char *);			\
+	ssize_t (*store)(struct _item *, const char *, size_t);		\
+}
+
+/*
+ * With the extended attribute structure, users can use this macro
+ * (similar to sysfs' __ATTR) to make defining attributes easier.
+ * An example:
+ * #define MYITEM_ATTR(_name, _mode, _show, _store)	\
+ * struct myitem_attribute childless_attr_##_name =	\
+ *         __CONFIGFS_ATTR(_name, _mode, _show, _store)
  */
 #define __CONFIGFS_ATTR(_name, _mode, _show, _store)			\
 {									\
@@ -143,6 +160,52 @@ struct configfs_attribute {
 	.show	= _show,						\
 	.store	= _store,						\
 }
+/* Here is a readonly version, only requiring a show() operation */
+#define __CONFIGFS_ATTR_RO(_name, _show)				\
+{									\
+	.attr	= {							\
+			.ca_name = __stringify(_name),			\
+			.ca_mode = 0444,				\
+			.ca_owner = THIS_MODULE,			\
+	},								\
+	.show	= _show,						\
+}
+
+/*
+ * With these extended attributes, the simple show_attribute() and
+ * store_attribute() operations need to call the show() and store() of the
+ * attributes.  This is a common pattern, so we provide a macro to define
+ * them.  The argument _item is the name of the config_item structure.
+ * This macro expects the attributes to be named "struct <name>_attribute"
+ * and the function to_<name>() to exist;
+ */
+#define CONFIGFS_ATTR_OPS(_item)					\
+static ssize_t _item##_attr_show(struct config_item *item,		\
+				 struct configfs_attribute *attr,	\
+				 char *page)				\
+{									\
+	struct _item *_item = to_##_item(item);				\
+	struct _item##_attribute *_item##_attr =			\
+		container_of(attr, struct _item##_attribute, attr);	\
+	ssize_t ret = 0;						\
+									\
+	if (_item##_attr->show)						\
+		ret = _item##_attr->show(_item, page);			\
+	return ret;							\
+}									\
+static ssize_t _item##_attr_store(struct config_item *item,		\
+				  struct configfs_attribute *attr,	\
+				  const char *page, size_t count)	\
+{									\
+	struct _item *_item = to_##_item(item);				\
+	struct _item##_attribute *_item##_attr =			\
+		container_of(attr, struct _item##_attribute, attr);	\
+	ssize_t ret = -EINVAL;						\
+									\
+	if (_item##_attr->store)					\
+		ret = _item##_attr->store(_item, page, count);		\
+	return ret;							\
+}
 
 /*
  * If allow_link() exists, the item can symlink(2) out to other
-- 
cgit v1.2.3


From c3f26a269c2421f97f10cf8ed05d5099b573af4d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 31 Jul 2008 16:58:50 -0700
Subject: netdev: Fix lockdep warnings in multiqueue configurations.

When support for multiple TX queues were added, the
netif_tx_lock() routines we converted to iterate over
all TX queues and grab each queue's spinlock.

This causes heartburn for lockdep and it's not a healthy
thing to do with lots of TX queues anyways.

So modify this to use a top-level lock and a "frozen"
state for the individual TX queues.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ifb.c         | 12 ++++---
 include/linux/netdevice.h | 86 ++++++++++++++++++++++++++++++-----------------
 net/core/dev.c            |  1 +
 net/core/netpoll.c        |  1 +
 net/core/pktgen.c         |  7 ++--
 net/sched/sch_generic.c   |  6 ++--
 net/sched/sch_teql.c      |  9 ++---
 7 files changed, 78 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 0960e69b2da4..e4fbefc8c82f 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -69,18 +69,20 @@ static void ri_tasklet(unsigned long dev)
 	struct net_device *_dev = (struct net_device *)dev;
 	struct ifb_private *dp = netdev_priv(_dev);
 	struct net_device_stats *stats = &_dev->stats;
+	struct netdev_queue *txq;
 	struct sk_buff *skb;
 
+	txq = netdev_get_tx_queue(_dev, 0);
 	dp->st_task_enter++;
 	if ((skb = skb_peek(&dp->tq)) == NULL) {
 		dp->st_txq_refl_try++;
-		if (netif_tx_trylock(_dev)) {
+		if (__netif_tx_trylock(txq)) {
 			dp->st_rxq_enter++;
 			while ((skb = skb_dequeue(&dp->rq)) != NULL) {
 				skb_queue_tail(&dp->tq, skb);
 				dp->st_rx2tx_tran++;
 			}
-			netif_tx_unlock(_dev);
+			__netif_tx_unlock(txq);
 		} else {
 			/* reschedule */
 			dp->st_rxq_notenter++;
@@ -115,7 +117,7 @@ static void ri_tasklet(unsigned long dev)
 			BUG();
 	}
 
-	if (netif_tx_trylock(_dev)) {
+	if (__netif_tx_trylock(txq)) {
 		dp->st_rxq_check++;
 		if ((skb = skb_peek(&dp->rq)) == NULL) {
 			dp->tasklet_pending = 0;
@@ -123,10 +125,10 @@ static void ri_tasklet(unsigned long dev)
 				netif_wake_queue(_dev);
 		} else {
 			dp->st_rxq_rsch++;
-			netif_tx_unlock(_dev);
+			__netif_tx_unlock(txq);
 			goto resched;
 		}
-		netif_tx_unlock(_dev);
+		__netif_tx_unlock(txq);
 	} else {
 resched:
 		dp->tasklet_pending = 1;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4d056ceab96..ee583f642a9f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -440,6 +440,7 @@ static inline void napi_synchronize(const struct napi_struct *n)
 enum netdev_queue_state_t
 {
 	__QUEUE_STATE_XOFF,
+	__QUEUE_STATE_FROZEN,
 };
 
 struct netdev_queue {
@@ -636,7 +637,7 @@ struct net_device
 	unsigned int		real_num_tx_queues;
 
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
-
+	spinlock_t		tx_global_lock;
 /*
  * One part is mostly used on xmit path (device)
  */
@@ -1099,6 +1100,11 @@ static inline int netif_queue_stopped(const struct net_device *dev)
 	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
 }
 
+static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue)
+{
+	return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state);
+}
+
 /**
  *	netif_running - test if up
  *	@dev: network device
@@ -1475,6 +1481,26 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 	txq->xmit_lock_owner = smp_processor_id();
 }
 
+static inline int __netif_tx_trylock(struct netdev_queue *txq)
+{
+	int ok = spin_trylock(&txq->_xmit_lock);
+	if (likely(ok))
+		txq->xmit_lock_owner = smp_processor_id();
+	return ok;
+}
+
+static inline void __netif_tx_unlock(struct netdev_queue *txq)
+{
+	txq->xmit_lock_owner = -1;
+	spin_unlock(&txq->_xmit_lock);
+}
+
+static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
+{
+	txq->xmit_lock_owner = -1;
+	spin_unlock_bh(&txq->_xmit_lock);
+}
+
 /**
  *	netif_tx_lock - grab network device transmit lock
  *	@dev: network device
@@ -1484,12 +1510,23 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
  */
 static inline void netif_tx_lock(struct net_device *dev)
 {
-	int cpu = smp_processor_id();
 	unsigned int i;
+	int cpu;
 
+	spin_lock(&dev->tx_global_lock);
+	cpu = smp_processor_id();
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+		/* We are the only thread of execution doing a
+		 * freeze, but we have to grab the _xmit_lock in
+		 * order to synchronize with threads which are in
+		 * the ->hard_start_xmit() handler and already
+		 * checked the frozen bit.
+		 */
 		__netif_tx_lock(txq, cpu);
+		set_bit(__QUEUE_STATE_FROZEN, &txq->state);
+		__netif_tx_unlock(txq);
 	}
 }
 
@@ -1499,40 +1536,22 @@ static inline void netif_tx_lock_bh(struct net_device *dev)
 	netif_tx_lock(dev);
 }
 
-static inline int __netif_tx_trylock(struct netdev_queue *txq)
-{
-	int ok = spin_trylock(&txq->_xmit_lock);
-	if (likely(ok))
-		txq->xmit_lock_owner = smp_processor_id();
-	return ok;
-}
-
-static inline int netif_tx_trylock(struct net_device *dev)
-{
-	return __netif_tx_trylock(netdev_get_tx_queue(dev, 0));
-}
-
-static inline void __netif_tx_unlock(struct netdev_queue *txq)
-{
-	txq->xmit_lock_owner = -1;
-	spin_unlock(&txq->_xmit_lock);
-}
-
-static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
-{
-	txq->xmit_lock_owner = -1;
-	spin_unlock_bh(&txq->_xmit_lock);
-}
-
 static inline void netif_tx_unlock(struct net_device *dev)
 {
 	unsigned int i;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		__netif_tx_unlock(txq);
-	}
 
+		/* No need to grab the _xmit_lock here.  If the
+		 * queue is not stopped for another reason, we
+		 * force a schedule.
+		 */
+		clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
+		if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
+			__netif_schedule(txq->qdisc);
+	}
+	spin_unlock(&dev->tx_global_lock);
 }
 
 static inline void netif_tx_unlock_bh(struct net_device *dev)
@@ -1556,13 +1575,18 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
 static inline void netif_tx_disable(struct net_device *dev)
 {
 	unsigned int i;
+	int cpu;
 
-	netif_tx_lock_bh(dev);
+	local_bh_disable();
+	cpu = smp_processor_id();
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+		__netif_tx_lock(txq, cpu);
 		netif_tx_stop_queue(txq);
+		__netif_tx_unlock(txq);
 	}
-	netif_tx_unlock_bh(dev);
+	local_bh_enable();
 }
 
 static inline void netif_addr_lock(struct net_device *dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 63d6bcddbf46..69320a56a084 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4200,6 +4200,7 @@ static void netdev_init_queues(struct net_device *dev)
 {
 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+	spin_lock_init(&dev->tx_global_lock);
 }
 
 /**
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c12720895ecf..6c7af390be0a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -70,6 +70,7 @@ static void queue_process(struct work_struct *work)
 		local_irq_save(flags);
 		__netif_tx_lock(txq, smp_processor_id());
 		if (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq) ||
 		    dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index c7d484f7e1c4..3284605f2ec7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3305,6 +3305,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 	txq = netdev_get_tx_queue(odev, queue_map);
 	if (netif_tx_queue_stopped(txq) ||
+	    netif_tx_queue_frozen(txq) ||
 	    need_resched()) {
 		idle_start = getCurUs();
 
@@ -3320,7 +3321,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 		pkt_dev->idle_acc += getCurUs() - idle_start;
 
-		if (netif_tx_queue_stopped(txq)) {
+		if (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq)) {
 			pkt_dev->next_tx_us = getCurUs();	/* TODO */
 			pkt_dev->next_tx_ns = 0;
 			goto out;	/* Try the next interface */
@@ -3352,7 +3354,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	txq = netdev_get_tx_queue(odev, queue_map);
 
 	__netif_tx_lock_bh(txq);
-	if (!netif_tx_queue_stopped(txq)) {
+	if (!netif_tx_queue_stopped(txq) &&
+	    !netif_tx_queue_frozen(txq)) {
 
 		atomic_inc(&(pkt_dev->skb->users));
 	      retry_now:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 345838a2e369..9c9cd4d94890 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -135,7 +135,8 @@ static inline int qdisc_restart(struct Qdisc *q)
 	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_subqueue_stopped(dev, skb))
+	if (!netif_tx_queue_stopped(txq) &&
+	    !netif_tx_queue_frozen(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 	HARD_TX_UNLOCK(dev, txq);
 
@@ -162,7 +163,8 @@ static inline int qdisc_restart(struct Qdisc *q)
 		break;
 	}
 
-	if (ret && netif_tx_queue_stopped(txq))
+	if (ret && (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq)))
 		ret = 0;
 
 	return ret;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 537223642b6e..2c35c678563b 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -305,10 +305,11 @@ restart:
 
 		switch (teql_resolve(skb, skb_res, slave)) {
 		case 0:
-			if (netif_tx_trylock(slave)) {
-				if (!__netif_subqueue_stopped(slave, subq) &&
+			if (__netif_tx_trylock(slave_txq)) {
+				if (!netif_tx_queue_stopped(slave_txq) &&
+				    !netif_tx_queue_frozen(slave_txq) &&
 				    slave->hard_start_xmit(skb, slave) == 0) {
-					netif_tx_unlock(slave);
+					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
 					master->stats.tx_packets++;
@@ -316,7 +317,7 @@ restart:
 						qdisc_pkt_len(skb);
 					return 0;
 				}
-				netif_tx_unlock(slave);
+				__netif_tx_unlock(slave_txq);
 			}
 			if (netif_queue_stopped(dev))
 				busy = 1;
-- 
cgit v1.2.3


From bc4768eb081a67642c0c44c34ea597c273bdedcb Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Thu, 31 Jul 2008 20:45:24 -0700
Subject: ipvs: Move userspace definitions to include/linux/ip_vs.h

Current versions of ipvsadm include "/usr/src/linux/include/net/ip_vs.h"
directly. This file also contains kernel-only definitions. Normally, public
definitions should live in include/linux, so this patch moves the
definitions shared with userspace to a new file, "include/linux/ip_vs.h".

This also removes the unused NFC_IPVS_PROPERTY bitmask, which was once
used to point into skb->nfcache.

To make old ipvsadms still compile with this, the old header file includes
the new one.

Thanks to Dave Miller and Horms for noting/adding the missing Kbuild entry
for the new header file.

Signed-off-by: Julius Volz <juliusv@google.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild  |   1 +
 include/linux/ip_vs.h | 245 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/net/ip_vs.h   | 253 ++------------------------------------------------
 3 files changed, 254 insertions(+), 245 deletions(-)
 create mode 100644 include/linux/ip_vs.h

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 4c4142c5aa6e..a26f565e8189 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -97,6 +97,7 @@ header-y += ioctl.h
 header-y += ip6_tunnel.h
 header-y += ipmi_msgdefs.h
 header-y += ipsec.h
+header-y += ip_vs.h
 header-y += ipx.h
 header-y += irda.h
 header-y += iso_fs.h
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
new file mode 100644
index 000000000000..ec6eb49af2d8
--- /dev/null
+++ b/include/linux/ip_vs.h
@@ -0,0 +1,245 @@
+/*
+ *      IP Virtual Server
+ *      data structure and functionality definitions
+ */
+
+#ifndef _IP_VS_H
+#define _IP_VS_H
+
+#include <linux/types.h>	/* For __beXX types in userland */
+
+#define IP_VS_VERSION_CODE	0x010201
+#define NVERSION(version)			\
+	(version >> 16) & 0xFF,			\
+	(version >> 8) & 0xFF,			\
+	version & 0xFF
+
+/*
+ *      Virtual Service Flags
+ */
+#define IP_VS_SVC_F_PERSISTENT	0x0001		/* persistent port */
+#define IP_VS_SVC_F_HASHED	0x0002		/* hashed entry */
+
+/*
+ *      Destination Server Flags
+ */
+#define IP_VS_DEST_F_AVAILABLE	0x0001		/* server is available */
+#define IP_VS_DEST_F_OVERLOAD	0x0002		/* server is overloaded */
+
+/*
+ *      IPVS sync daemon states
+ */
+#define IP_VS_STATE_NONE	0x0000		/* daemon is stopped */
+#define IP_VS_STATE_MASTER	0x0001		/* started as master */
+#define IP_VS_STATE_BACKUP	0x0002		/* started as backup */
+
+/*
+ *      IPVS socket options
+ */
+#define IP_VS_BASE_CTL		(64+1024+64)		/* base */
+
+#define IP_VS_SO_SET_NONE	IP_VS_BASE_CTL		/* just peek */
+#define IP_VS_SO_SET_INSERT	(IP_VS_BASE_CTL+1)
+#define IP_VS_SO_SET_ADD	(IP_VS_BASE_CTL+2)
+#define IP_VS_SO_SET_EDIT	(IP_VS_BASE_CTL+3)
+#define IP_VS_SO_SET_DEL	(IP_VS_BASE_CTL+4)
+#define IP_VS_SO_SET_FLUSH	(IP_VS_BASE_CTL+5)
+#define IP_VS_SO_SET_LIST	(IP_VS_BASE_CTL+6)
+#define IP_VS_SO_SET_ADDDEST	(IP_VS_BASE_CTL+7)
+#define IP_VS_SO_SET_DELDEST	(IP_VS_BASE_CTL+8)
+#define IP_VS_SO_SET_EDITDEST	(IP_VS_BASE_CTL+9)
+#define IP_VS_SO_SET_TIMEOUT	(IP_VS_BASE_CTL+10)
+#define IP_VS_SO_SET_STARTDAEMON (IP_VS_BASE_CTL+11)
+#define IP_VS_SO_SET_STOPDAEMON (IP_VS_BASE_CTL+12)
+#define IP_VS_SO_SET_RESTORE    (IP_VS_BASE_CTL+13)
+#define IP_VS_SO_SET_SAVE       (IP_VS_BASE_CTL+14)
+#define IP_VS_SO_SET_ZERO	(IP_VS_BASE_CTL+15)
+#define IP_VS_SO_SET_MAX	IP_VS_SO_SET_ZERO
+
+#define IP_VS_SO_GET_VERSION	IP_VS_BASE_CTL
+#define IP_VS_SO_GET_INFO	(IP_VS_BASE_CTL+1)
+#define IP_VS_SO_GET_SERVICES	(IP_VS_BASE_CTL+2)
+#define IP_VS_SO_GET_SERVICE	(IP_VS_BASE_CTL+3)
+#define IP_VS_SO_GET_DESTS	(IP_VS_BASE_CTL+4)
+#define IP_VS_SO_GET_DEST	(IP_VS_BASE_CTL+5)	/* not used now */
+#define IP_VS_SO_GET_TIMEOUT	(IP_VS_BASE_CTL+6)
+#define IP_VS_SO_GET_DAEMON	(IP_VS_BASE_CTL+7)
+#define IP_VS_SO_GET_MAX	IP_VS_SO_GET_DAEMON
+
+
+/*
+ *      IPVS Connection Flags
+ */
+#define IP_VS_CONN_F_FWD_MASK	0x0007		/* mask for the fwd methods */
+#define IP_VS_CONN_F_MASQ	0x0000		/* masquerading/NAT */
+#define IP_VS_CONN_F_LOCALNODE	0x0001		/* local node */
+#define IP_VS_CONN_F_TUNNEL	0x0002		/* tunneling */
+#define IP_VS_CONN_F_DROUTE	0x0003		/* direct routing */
+#define IP_VS_CONN_F_BYPASS	0x0004		/* cache bypass */
+#define IP_VS_CONN_F_SYNC	0x0020		/* entry created by sync */
+#define IP_VS_CONN_F_HASHED	0x0040		/* hashed entry */
+#define IP_VS_CONN_F_NOOUTPUT	0x0080		/* no output packets */
+#define IP_VS_CONN_F_INACTIVE	0x0100		/* not established */
+#define IP_VS_CONN_F_OUT_SEQ	0x0200		/* must do output seq adjust */
+#define IP_VS_CONN_F_IN_SEQ	0x0400		/* must do input seq adjust */
+#define IP_VS_CONN_F_SEQ_MASK	0x0600		/* in/out sequence mask */
+#define IP_VS_CONN_F_NO_CPORT	0x0800		/* no client port set yet */
+#define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
+
+#define IP_VS_SCHEDNAME_MAXLEN	16
+#define IP_VS_IFNAME_MAXLEN	16
+
+
+/*
+ *	The struct ip_vs_service_user and struct ip_vs_dest_user are
+ *	used to set IPVS rules through setsockopt.
+ */
+struct ip_vs_service_user {
+	/* virtual service addresses */
+	u_int16_t		protocol;
+	__be32			addr;		/* virtual ip address */
+	__be16			port;
+	u_int32_t		fwmark;		/* firwall mark of service */
+
+	/* virtual service options */
+	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
+	unsigned		flags;		/* virtual service flags */
+	unsigned		timeout;	/* persistent timeout in sec */
+	__be32			netmask;	/* persistent netmask */
+};
+
+
+struct ip_vs_dest_user {
+	/* destination server address */
+	__be32			addr;
+	__be16			port;
+
+	/* real server options */
+	unsigned		conn_flags;	/* connection flags */
+	int			weight;		/* destination weight */
+
+	/* thresholds for active connections */
+	u_int32_t		u_threshold;	/* upper threshold */
+	u_int32_t		l_threshold;	/* lower threshold */
+};
+
+
+/*
+ *	IPVS statistics object (for user space)
+ */
+struct ip_vs_stats_user
+{
+	__u32                   conns;          /* connections scheduled */
+	__u32                   inpkts;         /* incoming packets */
+	__u32                   outpkts;        /* outgoing packets */
+	__u64                   inbytes;        /* incoming bytes */
+	__u64                   outbytes;       /* outgoing bytes */
+
+	__u32			cps;		/* current connection rate */
+	__u32			inpps;		/* current in packet rate */
+	__u32			outpps;		/* current out packet rate */
+	__u32			inbps;		/* current in byte rate */
+	__u32			outbps;		/* current out byte rate */
+};
+
+
+/* The argument to IP_VS_SO_GET_INFO */
+struct ip_vs_getinfo {
+	/* version number */
+	unsigned int		version;
+
+	/* size of connection hash table */
+	unsigned int		size;
+
+	/* number of virtual services */
+	unsigned int		num_services;
+};
+
+
+/* The argument to IP_VS_SO_GET_SERVICE */
+struct ip_vs_service_entry {
+	/* which service: user fills in these */
+	u_int16_t		protocol;
+	__be32			addr;		/* virtual address */
+	__be16			port;
+	u_int32_t		fwmark;		/* firwall mark of service */
+
+	/* service options */
+	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
+	unsigned		flags;          /* virtual service flags */
+	unsigned		timeout;	/* persistent timeout */
+	__be32			netmask;	/* persistent netmask */
+
+	/* number of real servers */
+	unsigned int		num_dests;
+
+	/* statistics */
+	struct ip_vs_stats_user stats;
+};
+
+
+struct ip_vs_dest_entry {
+	__be32			addr;		/* destination address */
+	__be16			port;
+	unsigned		conn_flags;	/* connection flags */
+	int			weight;		/* destination weight */
+
+	u_int32_t		u_threshold;	/* upper threshold */
+	u_int32_t		l_threshold;	/* lower threshold */
+
+	u_int32_t		activeconns;	/* active connections */
+	u_int32_t		inactconns;	/* inactive connections */
+	u_int32_t		persistconns;	/* persistent connections */
+
+	/* statistics */
+	struct ip_vs_stats_user stats;
+};
+
+
+/* The argument to IP_VS_SO_GET_DESTS */
+struct ip_vs_get_dests {
+	/* which service: user fills in these */
+	u_int16_t		protocol;
+	__be32			addr;		/* virtual address */
+	__be16			port;
+	u_int32_t		fwmark;		/* firwall mark of service */
+
+	/* number of real servers */
+	unsigned int		num_dests;
+
+	/* the real servers */
+	struct ip_vs_dest_entry	entrytable[0];
+};
+
+
+/* The argument to IP_VS_SO_GET_SERVICES */
+struct ip_vs_get_services {
+	/* number of virtual services */
+	unsigned int		num_services;
+
+	/* service table */
+	struct ip_vs_service_entry entrytable[0];
+};
+
+
+/* The argument to IP_VS_SO_GET_TIMEOUT */
+struct ip_vs_timeout_user {
+	int			tcp_timeout;
+	int			tcp_fin_timeout;
+	int			udp_timeout;
+};
+
+
+/* The argument to IP_VS_SO_GET_DAEMON */
+struct ip_vs_daemon_user {
+	/* sync daemon state (master/backup) */
+	int			state;
+
+	/* multicast interface name */
+	char			mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+	/* SyncID we belong to */
+	int			syncid;
+};
+
+#endif	/* _IP_VS_H */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 9a51ebad3f1f..cbb59ebed4ae 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -3,254 +3,17 @@
  *      data structure and functionality definitions
  */
 
-#ifndef _IP_VS_H
-#define _IP_VS_H
-
-#include <asm/types.h>		/* For __uXX types */
-#include <linux/types.h>	/* For __beXX types in userland */
-
-#include <linux/sysctl.h>	/* For ctl_path */
-
-#define IP_VS_VERSION_CODE	0x010201
-#define NVERSION(version)			\
-	(version >> 16) & 0xFF,			\
-	(version >> 8) & 0xFF,			\
-	version & 0xFF
-
-/*
- *      Virtual Service Flags
- */
-#define IP_VS_SVC_F_PERSISTENT	0x0001		/* persistent port */
-#define IP_VS_SVC_F_HASHED	0x0002		/* hashed entry */
-
-/*
- *      Destination Server Flags
- */
-#define IP_VS_DEST_F_AVAILABLE	0x0001		/* server is available */
-#define IP_VS_DEST_F_OVERLOAD	0x0002		/* server is overloaded */
-
-/*
- *      IPVS sync daemon states
- */
-#define IP_VS_STATE_NONE	0x0000		/* daemon is stopped */
-#define IP_VS_STATE_MASTER	0x0001		/* started as master */
-#define IP_VS_STATE_BACKUP	0x0002		/* started as backup */
-
-/*
- *      IPVS socket options
- */
-#define IP_VS_BASE_CTL		(64+1024+64)		/* base */
-
-#define IP_VS_SO_SET_NONE	IP_VS_BASE_CTL		/* just peek */
-#define IP_VS_SO_SET_INSERT	(IP_VS_BASE_CTL+1)
-#define IP_VS_SO_SET_ADD	(IP_VS_BASE_CTL+2)
-#define IP_VS_SO_SET_EDIT	(IP_VS_BASE_CTL+3)
-#define IP_VS_SO_SET_DEL	(IP_VS_BASE_CTL+4)
-#define IP_VS_SO_SET_FLUSH	(IP_VS_BASE_CTL+5)
-#define IP_VS_SO_SET_LIST	(IP_VS_BASE_CTL+6)
-#define IP_VS_SO_SET_ADDDEST	(IP_VS_BASE_CTL+7)
-#define IP_VS_SO_SET_DELDEST	(IP_VS_BASE_CTL+8)
-#define IP_VS_SO_SET_EDITDEST	(IP_VS_BASE_CTL+9)
-#define IP_VS_SO_SET_TIMEOUT	(IP_VS_BASE_CTL+10)
-#define IP_VS_SO_SET_STARTDAEMON (IP_VS_BASE_CTL+11)
-#define IP_VS_SO_SET_STOPDAEMON (IP_VS_BASE_CTL+12)
-#define IP_VS_SO_SET_RESTORE    (IP_VS_BASE_CTL+13)
-#define IP_VS_SO_SET_SAVE       (IP_VS_BASE_CTL+14)
-#define IP_VS_SO_SET_ZERO	(IP_VS_BASE_CTL+15)
-#define IP_VS_SO_SET_MAX	IP_VS_SO_SET_ZERO
-
-#define IP_VS_SO_GET_VERSION	IP_VS_BASE_CTL
-#define IP_VS_SO_GET_INFO	(IP_VS_BASE_CTL+1)
-#define IP_VS_SO_GET_SERVICES	(IP_VS_BASE_CTL+2)
-#define IP_VS_SO_GET_SERVICE	(IP_VS_BASE_CTL+3)
-#define IP_VS_SO_GET_DESTS	(IP_VS_BASE_CTL+4)
-#define IP_VS_SO_GET_DEST	(IP_VS_BASE_CTL+5)	/* not used now */
-#define IP_VS_SO_GET_TIMEOUT	(IP_VS_BASE_CTL+6)
-#define IP_VS_SO_GET_DAEMON	(IP_VS_BASE_CTL+7)
-#define IP_VS_SO_GET_MAX	IP_VS_SO_GET_DAEMON
-
-
-/*
- *      IPVS Connection Flags
- */
-#define IP_VS_CONN_F_FWD_MASK	0x0007		/* mask for the fwd methods */
-#define IP_VS_CONN_F_MASQ	0x0000		/* masquerading/NAT */
-#define IP_VS_CONN_F_LOCALNODE	0x0001		/* local node */
-#define IP_VS_CONN_F_TUNNEL	0x0002		/* tunneling */
-#define IP_VS_CONN_F_DROUTE	0x0003		/* direct routing */
-#define IP_VS_CONN_F_BYPASS	0x0004		/* cache bypass */
-#define IP_VS_CONN_F_SYNC	0x0020		/* entry created by sync */
-#define IP_VS_CONN_F_HASHED	0x0040		/* hashed entry */
-#define IP_VS_CONN_F_NOOUTPUT	0x0080		/* no output packets */
-#define IP_VS_CONN_F_INACTIVE	0x0100		/* not established */
-#define IP_VS_CONN_F_OUT_SEQ	0x0200		/* must do output seq adjust */
-#define IP_VS_CONN_F_IN_SEQ	0x0400		/* must do input seq adjust */
-#define IP_VS_CONN_F_SEQ_MASK	0x0600		/* in/out sequence mask */
-#define IP_VS_CONN_F_NO_CPORT	0x0800		/* no client port set yet */
-#define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
-
-/* Move it to better place one day, for now keep it unique */
-#define NFC_IPVS_PROPERTY	0x10000
-
-#define IP_VS_SCHEDNAME_MAXLEN	16
-#define IP_VS_IFNAME_MAXLEN	16
-
-
-/*
- *	The struct ip_vs_service_user and struct ip_vs_dest_user are
- *	used to set IPVS rules through setsockopt.
- */
-struct ip_vs_service_user {
-	/* virtual service addresses */
-	u_int16_t		protocol;
-	__be32			addr;		/* virtual ip address */
-	__be16			port;
-	u_int32_t		fwmark;		/* firwall mark of service */
-
-	/* virtual service options */
-	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
-	unsigned		flags;		/* virtual service flags */
-	unsigned		timeout;	/* persistent timeout in sec */
-	__be32			netmask;	/* persistent netmask */
-};
-
-
-struct ip_vs_dest_user {
-	/* destination server address */
-	__be32			addr;
-	__be16			port;
-
-	/* real server options */
-	unsigned		conn_flags;	/* connection flags */
-	int			weight;		/* destination weight */
-
-	/* thresholds for active connections */
-	u_int32_t		u_threshold;	/* upper threshold */
-	u_int32_t		l_threshold;	/* lower threshold */
-};
-
-
-/*
- *	IPVS statistics object (for user space)
- */
-struct ip_vs_stats_user
-{
-	__u32                   conns;          /* connections scheduled */
-	__u32                   inpkts;         /* incoming packets */
-	__u32                   outpkts;        /* outgoing packets */
-	__u64                   inbytes;        /* incoming bytes */
-	__u64                   outbytes;       /* outgoing bytes */
-
-	__u32			cps;		/* current connection rate */
-	__u32			inpps;		/* current in packet rate */
-	__u32			outpps;		/* current out packet rate */
-	__u32			inbps;		/* current in byte rate */
-	__u32			outbps;		/* current out byte rate */
-};
-
-
-/* The argument to IP_VS_SO_GET_INFO */
-struct ip_vs_getinfo {
-	/* version number */
-	unsigned int		version;
-
-	/* size of connection hash table */
-	unsigned int		size;
-
-	/* number of virtual services */
-	unsigned int		num_services;
-};
-
-
-/* The argument to IP_VS_SO_GET_SERVICE */
-struct ip_vs_service_entry {
-	/* which service: user fills in these */
-	u_int16_t		protocol;
-	__be32			addr;		/* virtual address */
-	__be16			port;
-	u_int32_t		fwmark;		/* firwall mark of service */
-
-	/* service options */
-	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
-	unsigned		flags;          /* virtual service flags */
-	unsigned		timeout;	/* persistent timeout */
-	__be32			netmask;	/* persistent netmask */
-
-	/* number of real servers */
-	unsigned int		num_dests;
-
-	/* statistics */
-	struct ip_vs_stats_user stats;
-};
-
-
-struct ip_vs_dest_entry {
-	__be32			addr;		/* destination address */
-	__be16			port;
-	unsigned		conn_flags;	/* connection flags */
-	int			weight;		/* destination weight */
-
-	u_int32_t		u_threshold;	/* upper threshold */
-	u_int32_t		l_threshold;	/* lower threshold */
-
-	u_int32_t		activeconns;	/* active connections */
-	u_int32_t		inactconns;	/* inactive connections */
-	u_int32_t		persistconns;	/* persistent connections */
-
-	/* statistics */
-	struct ip_vs_stats_user stats;
-};
-
-
-/* The argument to IP_VS_SO_GET_DESTS */
-struct ip_vs_get_dests {
-	/* which service: user fills in these */
-	u_int16_t		protocol;
-	__be32			addr;		/* virtual address */
-	__be16			port;
-	u_int32_t		fwmark;		/* firwall mark of service */
-
-	/* number of real servers */
-	unsigned int		num_dests;
-
-	/* the real servers */
-	struct ip_vs_dest_entry	entrytable[0];
-};
-
-
-/* The argument to IP_VS_SO_GET_SERVICES */
-struct ip_vs_get_services {
-	/* number of virtual services */
-	unsigned int		num_services;
-
-	/* service table */
-	struct ip_vs_service_entry entrytable[0];
-};
-
-
-/* The argument to IP_VS_SO_GET_TIMEOUT */
-struct ip_vs_timeout_user {
-	int			tcp_timeout;
-	int			tcp_fin_timeout;
-	int			udp_timeout;
-};
-
-
-/* The argument to IP_VS_SO_GET_DAEMON */
-struct ip_vs_daemon_user {
-	/* sync daemon state (master/backup) */
-	int			state;
-
-	/* multicast interface name */
-	char			mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-	/* SyncID we belong to */
-	int			syncid;
-};
+#ifndef _NET_IP_VS_H
+#define _NET_IP_VS_H
 
+#include <linux/ip_vs.h>                /* definitions shared with userland */
 
+/* old ipvsadm versions still include this file directly */
 #ifdef __KERNEL__
 
+#include <asm/types.h>                  /* for __uXX types */
+
+#include <linux/sysctl.h>               /* for ctl_path */
 #include <linux/list.h>                 /* for struct list_head */
 #include <linux/spinlock.h>             /* for struct rwlock_t */
 #include <asm/atomic.h>                 /* for struct atomic_t */
@@ -981,4 +744,4 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 
 #endif /* __KERNEL__ */
 
-#endif	/* _IP_VS_H */
+#endif	/* _NET_IP_VS_H */
-- 
cgit v1.2.3


From 4a7b61d23505854dff7d04cc11944566cffdd0ee Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 31 Jul 2008 20:52:08 -0700
Subject: skbuff: add missing kernel-doc for do_not_encrypt

Add missing kernel-doc notation to sk_buff:

Warning(linux-2.6.27-rc1-git2//include/linux/skbuff.h:345): No description found for parameter 'do_not_encrypt'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a640385e0598..cfcc45b3bef0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -243,6 +243,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@tc_index: Traffic control index
  *	@tc_verd: traffic control verdict
  *	@ndisc_nodetype: router type (from link layer)
+ *	@do_not_encrypt: set to prevent encryption of this frame
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
  *	@secmark: security marking
-- 
cgit v1.2.3


From a4b526b3ba6353cd89a38e41da48ed83b0ead16f Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 1 Aug 2008 16:39:12 +0200
Subject: [S390] Optimize storage key operations for anon pages

For anonymous pages without a swap cache backing the check in
page_remove_rmap for the physical dirty bit in page_remove_rmap is
unnecessary. The instructions that are used to check and reset the dirty
bit are expensive. Removing the check noticably speeds up process exit.
In addition the clearing of the dirty bit in __SetPageUptodate is
pointless as well. With these two changes there is no storage key
operation for an anonymous page anymore if it does not hit the swap
space.

The micro benchmark which repeatedly executes an empty shell script
gets about 5% faster.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/page-flags.h | 3 ---
 mm/rmap.c                  | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 54590a9a103e..25aaccdb2f26 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -239,9 +239,6 @@ static inline void __SetPageUptodate(struct page *page)
 {
 	smp_wmb();
 	__set_bit(PG_uptodate, &(page)->flags);
-#ifdef CONFIG_S390
-	page_clear_dirty(page);
-#endif
 }
 
 static inline void SetPageUptodate(struct page *page)
diff --git a/mm/rmap.c b/mm/rmap.c
index 99bc3f9cd796..94a5246a3f98 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -667,7 +667,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
 		 * Leaving it set also helps swapoff to reinstate ptes
 		 * faster for those pages still in swapcache.
 		 */
-		if (page_test_dirty(page)) {
+		if ((!PageAnon(page) || PageSwapCache(page)) &&
+		    page_test_dirty(page)) {
 			page_clear_dirty(page);
 			set_page_dirty(page);
 		}
-- 
cgit v1.2.3


From 1027abe8827b47f7e9c4ed6514fde3d44f79963c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 Jul 2008 04:13:04 -0400
Subject: [PATCH] merge locate_fd() and get_unused_fd()

	New primitive: alloc_fd(start, flags).  get_unused_fd() and
get_unused_fd_flags() become wrappers on top of it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c           | 87 +++++++++-------------------------------------------
 fs/file.c            | 61 ++++++++++++++++++++++++++++++++++++
 fs/open.c            | 56 ---------------------------------
 include/linux/file.h |  3 +-
 4 files changed, 77 insertions(+), 130 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 61d625136813..2e40799daad6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd)
 	return res;
 }
 
-/*
- * locate_fd finds a free file descriptor in the open_fds fdset,
- * expanding the fd arrays if necessary.  Must be called with the
- * file_lock held for write.
- */
-
-static int locate_fd(unsigned int orig_start, int cloexec)
-{
-	struct files_struct *files = current->files;
-	unsigned int newfd;
-	unsigned int start;
-	int error;
-	struct fdtable *fdt;
-
-	spin_lock(&files->file_lock);
-repeat:
-	fdt = files_fdtable(files);
-	/*
-	 * Someone might have closed fd's in the range
-	 * orig_start..fdt->next_fd
-	 */
-	start = orig_start;
-	if (start < files->next_fd)
-		start = files->next_fd;
-
-	newfd = start;
-	if (start < fdt->max_fds)
-		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
-					   fdt->max_fds, start);
-
-	error = expand_files(files, newfd);
-	if (error < 0)
-		goto out;
-
-	/*
-	 * If we needed to expand the fs array we
-	 * might have blocked - try again.
-	 */
-	if (error)
-		goto repeat;
-
-	if (start <= files->next_fd)
-		files->next_fd = newfd + 1;
-
-	FD_SET(newfd, fdt->open_fds);
-	if (cloexec)
-		FD_SET(newfd, fdt->close_on_exec);
-	else
-		FD_CLR(newfd, fdt->close_on_exec);
-	error = newfd;
-
-out:
-	spin_unlock(&files->file_lock);
-	return error;
-}
-
-static int dupfd(struct file *file, unsigned int start, int cloexec)
-{
-	int fd = locate_fd(start, cloexec);
-	if (fd >= 0)
-		fd_install(fd, file);
-	else
-		fput(file);
-
-	return fd;
-}
-
 asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
 	int err = -EBADF;
@@ -194,10 +127,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 asmlinkage long sys_dup(unsigned int fildes)
 {
 	int ret = -EBADF;
-	struct file * file = fget(fildes);
-
-	if (file)
-		ret = dupfd(file, 0, 0);
+	struct file *file = fget(fildes);
+
+	if (file) {
+		ret = get_unused_fd();
+		if (ret >= 0)
+			fd_install(ret, file);
+		else
+			fput(file);
+	}
 	return ret;
 }
 
@@ -322,8 +260,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_DUPFD_CLOEXEC:
 		if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 			break;
-		get_file(filp);
-		err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
+		err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
+		if (err >= 0) {
+			get_file(filp);
+			fd_install(err, filp);
+		}
 		break;
 	case F_GETFD:
 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/file.c b/fs/file.c
index d8773b19fe47..f313314f996f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
  *  Manage the dynamic fd arrays in the process files_struct.
  */
 
+#include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/time.h>
@@ -432,3 +433,63 @@ struct files_struct init_files = {
 	},
 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
 };
+
+/*
+ * allocate a file descriptor, mark it busy.
+ */
+int alloc_fd(unsigned start, unsigned flags)
+{
+	struct files_struct *files = current->files;
+	unsigned int fd;
+	int error;
+	struct fdtable *fdt;
+
+	spin_lock(&files->file_lock);
+repeat:
+	fdt = files_fdtable(files);
+	fd = start;
+	if (fd < files->next_fd)
+		fd = files->next_fd;
+
+	if (fd < fdt->max_fds)
+		fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+					   fdt->max_fds, fd);
+
+	error = expand_files(files, fd);
+	if (error < 0)
+		goto out;
+
+	/*
+	 * If we needed to expand the fs array we
+	 * might have blocked - try again.
+	 */
+	if (error)
+		goto repeat;
+
+	if (start <= files->next_fd)
+		files->next_fd = fd + 1;
+
+	FD_SET(fd, fdt->open_fds);
+	if (flags & O_CLOEXEC)
+		FD_SET(fd, fdt->close_on_exec);
+	else
+		FD_CLR(fd, fdt->close_on_exec);
+	error = fd;
+#if 1
+	/* Sanity check */
+	if (rcu_dereference(fdt->fd[fd]) != NULL) {
+		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
+		rcu_assign_pointer(fdt->fd[fd], NULL);
+	}
+#endif
+
+out:
+	spin_unlock(&files->file_lock);
+	return error;
+}
+
+int get_unused_fd(void)
+{
+	return alloc_fd(0, 0);
+}
+EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/open.c b/fs/open.c
index 52647be277a2..07da9359481c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 }
 EXPORT_SYMBOL(dentry_open);
 
-/*
- * Find an empty file descriptor entry, and mark it busy.
- */
-int get_unused_fd_flags(int flags)
-{
-	struct files_struct * files = current->files;
-	int fd, error;
-	struct fdtable *fdt;
-
-	spin_lock(&files->file_lock);
-
-repeat:
-	fdt = files_fdtable(files);
-	fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
-				files->next_fd);
-
-	/* Do we need to expand the fd array or fd set?  */
-	error = expand_files(files, fd);
-	if (error < 0)
-		goto out;
-
-	if (error) {
-		/*
-	 	 * If we needed to expand the fs array we
-		 * might have blocked - try again.
-		 */
-		goto repeat;
-	}
-
-	FD_SET(fd, fdt->open_fds);
-	if (flags & O_CLOEXEC)
-		FD_SET(fd, fdt->close_on_exec);
-	else
-		FD_CLR(fd, fdt->close_on_exec);
-	files->next_fd = fd + 1;
-#if 1
-	/* Sanity check */
-	if (fdt->fd[fd] != NULL) {
-		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
-		fdt->fd[fd] = NULL;
-	}
-#endif
-	error = fd;
-
-out:
-	spin_unlock(&files->file_lock);
-	return error;
-}
-
-int get_unused_fd(void)
-{
-	return get_unused_fd_flags(0);
-}
-
-EXPORT_SYMBOL(get_unused_fd);
-
 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
 	struct fdtable *fdt = files_fdtable(files);
diff --git a/include/linux/file.h b/include/linux/file.h
index 27c64bdc68c9..a20259e248a5 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -34,8 +34,9 @@ extern struct file *fget(unsigned int fd);
 extern struct file *fget_light(unsigned int fd, int *fput_needed);
 extern void set_close_on_exec(unsigned int fd, int flag);
 extern void put_filp(struct file *);
+extern int alloc_fd(unsigned start, unsigned flags);
 extern int get_unused_fd(void);
-extern int get_unused_fd_flags(int flags);
+#define get_unused_fd_flags(flags) alloc_fd(0, (flags))
 extern void put_unused_fd(unsigned int fd);
 
 extern void fd_install(unsigned int fd, struct file *file);
-- 
cgit v1.2.3


From 77e69dac3cefacee939cb107ae9cd520a62338e0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Aug 2008 04:29:18 -0400
Subject: [PATCH] fix races and leaks in vfs_quota_on() users

* new helper: vfs_quota_on_path(); equivalent of vfs_quota_on() sans the
  pathname resolution.
* callers of vfs_quota_on() that do their own pathname resolution and
  checks based on it are switched to vfs_quota_on_path(); that way we
  avoid the races.
* reiserfs leaked dentry/vfsmount references on several failure exits.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dquot.c               | 33 ++++++++++++++++++++-------------
 fs/ext3/super.c          |  3 ++-
 fs/ext4/super.c          |  3 ++-
 fs/reiserfs/super.c      | 16 +++++++++-------
 include/linux/quotaops.h |  2 ++
 5 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 1346eebe74ce..8ec4d6cc7633 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
 	return ret;
 }
 
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+		      struct path *path)
+{
+	int error = security_quota_on(path->dentry);
+	if (error)
+		return error;
+	/* Quota file not on the same filesystem? */
+	if (path->mnt->mnt_sb != sb)
+		error = -EXDEV;
+	else
+		error = vfs_quota_on_inode(path->dentry->d_inode, type,
+					   format_id);
+	return error;
+}
+
 /* Actual function called from quotactl() */
 int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
 		 int remount)
@@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
 		return vfs_quota_on_remount(sb, type);
 
 	error = path_lookup(path, LOOKUP_FOLLOW, &nd);
-	if (error < 0)
-		return error;
-	error = security_quota_on(nd.path.dentry);
-	if (error)
-		goto out_path;
-	/* Quota file not on the same filesystem? */
-	if (nd.path.mnt->mnt_sb != sb)
-		error = -EXDEV;
-	else
-		error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
-					   format_id);
-out_path:
-	path_put(&nd.path);
+	if (!error) {
+		error = vfs_quota_on_path(sb, type, format_id, &nd.path);
+		path_put(&nd.path);
+	}
 	return error;
 }
 
@@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
 EXPORT_SYMBOL(dqstats);
 EXPORT_SYMBOL(dq_data_lock);
 EXPORT_SYMBOL(vfs_quota_on);
+EXPORT_SYMBOL(vfs_quota_on_path);
 EXPORT_SYMBOL(vfs_quota_on_mount);
 EXPORT_SYMBOL(vfs_quota_off);
 EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ddced384674..f38a5afc39a1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 		journal_unlock_updates(EXT3_SB(sb)->s_journal);
 	}
 
+	err = vfs_quota_on_path(sb, type, format_id, &nd.path);
 	path_put(&nd.path);
-	return vfs_quota_on(sb, type, format_id, path, remount);
+	return err;
 }
 
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b5479b1dff14..1e69f29a8c55 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3352,8 +3352,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	}
 
+	err = vfs_quota_on_path(sb, type, format_id, &nd.path);
 	path_put(&nd.path);
-	return vfs_quota_on(sb, type, format_id, path, remount);
+	return err;
 }
 
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 879e54d35c2d..282a13596c70 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2076,8 +2076,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		return err;
 	/* Quotafile not on the same filesystem? */
 	if (nd.path.mnt->mnt_sb != sb) {
-		path_put(&nd.path);
-		return -EXDEV;
+		err = -EXDEV;
+		goto out;
 	}
 	inode = nd.path.dentry->d_inode;
 	/* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2087,8 +2087,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 			reiserfs_warning(sb,
 				"reiserfs: Unpacking tail of quota file failed"
 				" (%d). Cannot turn on quotas.", err);
-			path_put(&nd.path);
-			return -EINVAL;
+			err = -EINVAL;
+			goto out;
 		}
 		mark_inode_dirty(inode);
 	}
@@ -2109,13 +2109,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		/* Just start temporary transaction and finish it */
 		err = journal_begin(&th, sb, 1);
 		if (err)
-			return err;
+			goto out;
 		err = journal_end_sync(&th, sb, 1);
 		if (err)
-			return err;
+			goto out;
 	}
+	err = vfs_quota_on_path(sb, type, format_id, &nd.path);
+out:
 	path_put(&nd.path);
-	return vfs_quota_on(sb, type, format_id, path, 0);
+	return err;
 }
 
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 742187f7a05c..ca6b9b5c8d52 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -43,6 +43,8 @@ int dquot_mark_dquot_dirty(struct dquot *dquot);
 
 int vfs_quota_on(struct super_block *sb, int type, int format_id,
  	char *path, int remount);
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+ 	struct path *path);
 int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int vfs_quota_off(struct super_block *sb, int type, int remount);
-- 
cgit v1.2.3


From 8d66bf5481002b0960aa49aed0987c73f5d7816c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Aug 2008 09:05:54 -0400
Subject: [PATCH] pass struct path * to do_add_mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/mntpt.c         |  2 +-
 fs/cifs/cifs_dfs_ref.c |  2 +-
 fs/namespace.c         | 16 ++++++++--------
 fs/nfs/namespace.c     |  2 +-
 include/linux/mount.h  |  3 ++-
 5 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c37..78db4953a800 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	}
 
 	mntget(newmnt);
-	err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
+	err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
 	switch (err) {
 	case 0:
 		path_put(&nd->path);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e329..d2c8eef84f3c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
 	int err;
 
 	mntget(newmnt);
-	err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist);
+	err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
 	switch (err) {
 	case 0:
 		path_put(&nd->path);
diff --git a/fs/namespace.c b/fs/namespace.c
index 411728c0c8bb..6e283c93b50d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1667,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
-	return do_add_mount(mnt, nd, mnt_flags, NULL);
+	return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
 }
 
 /*
  * add a mount into a namespace's mount tree
  * - provide the option of adding the new mount to an expiration list
  */
-int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+int do_add_mount(struct vfsmount *newmnt, struct path *path,
 		 int mnt_flags, struct list_head *fslist)
 {
 	int err;
 
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
-	while (d_mountpoint(nd->path.dentry) &&
-	       follow_down(&nd->path.mnt, &nd->path.dentry))
+	while (d_mountpoint(path->dentry) &&
+	       follow_down(&path->mnt, &path->dentry))
 		;
 	err = -EINVAL;
-	if (!check_mnt(nd->path.mnt))
+	if (!check_mnt(path->mnt))
 		goto unlock;
 
 	/* Refuse the same filesystem on the same mount point */
 	err = -EBUSY;
-	if (nd->path.mnt->mnt_sb == newmnt->mnt_sb &&
-	    nd->path.mnt->mnt_root == nd->path.dentry)
+	if (path->mnt->mnt_sb == newmnt->mnt_sb &&
+	    path->mnt->mnt_root == path->dentry)
 		goto unlock;
 
 	err = -EINVAL;
@@ -1699,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
 		goto unlock;
 
 	newmnt->mnt_flags = mnt_flags;
-	if ((err = graft_tree(newmnt, &nd->path)))
+	if ((err = graft_tree(newmnt, path)))
 		goto unlock;
 
 	if (fslist) /* add to the specified expiration list */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef76399..66df08dd1caf 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 		goto out_err;
 
 	mntget(mnt);
-	err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
+	err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
 			   &nfs_automount_list);
 	if (err < 0) {
 		mntput(mnt);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b5efaa2132ab..30a1d63b6fb5 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -105,7 +105,8 @@ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 
 struct nameidata;
 
-extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+struct path;
+extern int do_add_mount(struct vfsmount *newmnt, struct path *path,
 			int mnt_flags, struct list_head *fslist);
 
 extern void mark_mounts_for_expiry(struct list_head *mounts);
-- 
cgit v1.2.3


From 6c5e0c4d518a37e1d5d794c14433e80284415079 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 1 Aug 2008 20:31:32 +0200
Subject: block: add a blk_plug_device_unlocked() that grabs the queue lock

blk_plug_device() must be called with the queue lock held, so callers
often just grab and release the lock for that purpose. Add a helper
that does just that.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 18 ++++++++++++++++++
 include/linux/blkdev.h |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index fef79ccb2a11..4889eb86a39e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -212,6 +212,24 @@ void blk_plug_device(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_plug_device);
 
+/**
+ * blk_plug_device_unlocked - plug a device without queue lock held
+ * @q:    The &struct request_queue to plug
+ *
+ * Description:
+ *   Like @blk_plug_device(), but grabs the queue lock and disables
+ *   interrupts.
+ **/
+void blk_plug_device_unlocked(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	blk_plug_device(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_plug_device_unlocked);
+
 /*
  * remove the queue from the plugged list, if present. called with
  * queue lock held and interrupts disabled.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 88d68081a0f1..e61f22be4d0e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -655,6 +655,7 @@ extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_plug_device(struct request_queue *);
+extern void blk_plug_device_unlocked(struct request_queue *);
 extern int blk_remove_plug(struct request_queue *);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_cmd_ioctl(struct file *, struct request_queue *,
-- 
cgit v1.2.3


From 5c7edcd7ee6b77b88252fe4096dce1a46a60c829 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Thu, 31 Jul 2008 02:04:09 -0700
Subject: tracehook: fix exit_signal=0 case

My commit 2b2a1ff64afbadac842bbc58c5166962cf4f7664 introduced a regression
(sorry about that) for the odd case of exit_signal=0 (e.g. clone_flags=0).
This is not a normal use, but it's used by a case in the glibc test suite.

Dying with exit_signal=0 sends no signal, but it's supposed to wake up a
parent's blocked wait*() calls (unlike the delayed_group_leader case).
This fixes tracehook_notify_death() and its caller to distinguish a
"signal 0" wakeup from the delayed_group_leader case (with no wakeup).

Signed-off-by: Roland McGrath <roland@redhat.com>
Tested-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 21 +++++++++++++--------
 kernel/exit.c             |  6 +++---
 2 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b1875582c1a1..12532839f508 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -493,16 +493,21 @@ static inline int tracehook_notify_jctl(int notify, int why)
  * @death_cookie:	value to pass to tracehook_report_death()
  * @group_dead:		nonzero if this was the last thread in the group to die
  *
- * Return the signal number to send our parent with do_notify_parent(), or
- * zero to send no signal and leave a zombie, or -1 to self-reap right now.
+ * A return value >= 0 means call do_notify_parent() with that signal
+ * number.  Negative return value can be %DEATH_REAP to self-reap right
+ * now, or %DEATH_DELAYED_GROUP_LEADER to a zombie without notifying our
+ * parent.  Note that a return value of 0 means a do_notify_parent() call
+ * that sends no signal, but still wakes up a parent blocked in wait*().
  *
  * Called with write_lock_irq(&tasklist_lock) held.
  */
+#define DEATH_REAP			-1
+#define DEATH_DELAYED_GROUP_LEADER	-2
 static inline int tracehook_notify_death(struct task_struct *task,
 					 void **death_cookie, int group_dead)
 {
 	if (task->exit_signal == -1)
-		return task->ptrace ? SIGCHLD : -1;
+		return task->ptrace ? SIGCHLD : DEATH_REAP;
 
 	/*
 	 * If something other than our normal parent is ptracing us, then
@@ -512,21 +517,21 @@ static inline int tracehook_notify_death(struct task_struct *task,
 	if (thread_group_empty(task) && !ptrace_reparented(task))
 		return task->exit_signal;
 
-	return task->ptrace ? SIGCHLD : 0;
+	return task->ptrace ? SIGCHLD : DEATH_DELAYED_GROUP_LEADER;
 }
 
 /**
  * tracehook_report_death - task is dead and ready to be reaped
  * @task:		@current task now exiting
- * @signal:		signal number sent to parent, or 0 or -1
+ * @signal:		return value from tracheook_notify_death()
  * @death_cookie:	value passed back from tracehook_notify_death()
  * @group_dead:		nonzero if this was the last thread in the group to die
  *
  * Thread has just become a zombie or is about to self-reap.  If positive,
  * @signal is the signal number just sent to the parent (usually %SIGCHLD).
- * If @signal is -1, this thread will self-reap.  If @signal is 0, this is
- * a delayed_group_leader() zombie.  The @death_cookie was passed back by
- * tracehook_notify_death().
+ * If @signal is %DEATH_REAP, this thread will self-reap.  If @signal is
+ * %DEATH_DELAYED_GROUP_LEADER, this is a delayed_group_leader() zombie.
+ * The @death_cookie was passed back by tracehook_notify_death().
  *
  * If normal reaping is not inhibited, @task->exit_state might be changing
  * in parallel.
diff --git a/kernel/exit.c b/kernel/exit.c
index eb4d6470d1d0..38ec40630149 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -911,10 +911,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 		tsk->exit_signal = SIGCHLD;
 
 	signal = tracehook_notify_death(tsk, &cookie, group_dead);
-	if (signal > 0)
+	if (signal >= 0)
 		signal = do_notify_parent(tsk, signal);
 
-	tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE;
+	tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE;
 
 	/* mt-exec, de_thread() is waiting for us */
 	if (thread_group_leader(tsk) &&
@@ -927,7 +927,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	tracehook_report_death(tsk, signal, cookie, group_dead);
 
 	/* If the process is dead, release it - nobody will wait for it */
-	if (signal < 0)
+	if (signal == DEATH_REAP)
 		release_task(tsk);
 }
 
-- 
cgit v1.2.3


From 4744b43431e8613f920c5cba88346756f53c5165 Mon Sep 17 00:00:00 2001
From: Tim Bird <tim.bird@am.sony.com>
Date: Fri, 1 Aug 2008 14:05:50 -0700
Subject: embedded: fix vc_translate operator precedence

This fixes a bug in operator precedence in the newly introduced vc_translate
macro.  Without this fix, the translation of some characters on the
kernel console is garbled.

This patch was copied to the e-mail list previously for testing.  Now,
all reports confirm that it works, so this is an official post for
application.

Signed-off-by: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/vt_kern.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 14c0e91be9b5..8c8119ffee12 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -74,7 +74,7 @@ void con_protect_unimap(struct vc_data *vc, int rdonly);
 int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 
 #define vc_translate(vc, c) ((vc)->vc_translate[(c) |			\
-					(vc)->vc_toggle_meta ? 0x80 : 0])
+					((vc)->vc_toggle_meta ? 0x80 : 0)])
 #else
 #define con_set_trans_old(arg) (0)
 #define con_get_trans_old(arg) (-EINVAL)
-- 
cgit v1.2.3


From ff4cc1de2401ad44ae084c3f5a9e898af0879520 Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Wed, 30 Jul 2008 18:26:58 +0200
Subject: mISDN cleanup user interface

The channelmap should have the same size on 32 and 64 bit systems
and should not depend on endianess.
Thanks to David Woodhouse for spotting this.

Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c |  6 +++---
 drivers/isdn/hardware/mISDN/hfcpci.c   |  2 +-
 drivers/isdn/mISDN/l1oip_core.c        |  6 ++----
 drivers/isdn/mISDN/socket.c            |  4 ++--
 include/linux/mISDNif.h                | 32 +++++++++++++++++++++++++++-----
 5 files changed, 35 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 2649ea55a9e8..10144e871c06 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -3971,7 +3971,7 @@ open_bchannel(struct hfc_multi *hc, struct dchannel *dch,
 	struct bchannel	*bch;
 	int		ch;
 
-	if (!test_bit(rq->adr.channel, &dch->dev.channelmap[0]))
+	if (!test_channelmap(rq->adr.channel, dch->dev.channelmap))
 		return -EINVAL;
 	if (rq->protocol == ISDN_P_NONE)
 		return -EINVAL;
@@ -4587,7 +4587,7 @@ init_e1_port(struct hfc_multi *hc, struct hm_map *m)
 		list_add(&bch->ch.list, &dch->dev.bchannels);
 		hc->chan[ch].bch = bch;
 		hc->chan[ch].port = 0;
-		test_and_set_bit(bch->nr, &dch->dev.channelmap[0]);
+		set_channelmap(bch->nr, dch->dev.channelmap);
 	}
 	/* set optical line type */
 	if (port[Port_cnt] & 0x001) {
@@ -4755,7 +4755,7 @@ init_multi_port(struct hfc_multi *hc, int pt)
 		list_add(&bch->ch.list, &dch->dev.bchannels);
 		hc->chan[i + ch].bch = bch;
 		hc->chan[i + ch].port = pt;
-		test_and_set_bit(bch->nr, &dch->dev.channelmap[0]);
+		set_channelmap(bch->nr, dch->dev.channelmap);
 	}
 	/* set master clock */
 	if (port[Port_cnt] & 0x001) {
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index 3231814e7efa..9cf5edbb1a9b 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -2056,7 +2056,7 @@ setup_card(struct hfc_pci *card)
 	card->dch.dev.nrbchan = 2;
 	for (i = 0; i < 2; i++) {
 		card->bch[i].nr = i + 1;
-		test_and_set_bit(i + 1, &card->dch.dev.channelmap[0]);
+		set_channelmap(i + 1, card->dch.dev.channelmap);
 		card->bch[i].debug = debug;
 		mISDN_initbchannel(&card->bch[i], MAX_DATA_MEM);
 		card->bch[i].hw = card;
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index 155b99780c4f..e42150a57780 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -1006,8 +1006,7 @@ open_bchannel(struct l1oip *hc, struct dchannel *dch, struct channel_req *rq)
 	struct bchannel	*bch;
 	int		ch;
 
-	if (!test_bit(rq->adr.channel & 0x1f,
-		&dch->dev.channelmap[rq->adr.channel >> 5]))
+	if (!test_channelmap(rq->adr.channel, dch->dev.channelmap))
 		return -EINVAL;
 	if (rq->protocol == ISDN_P_NONE)
 		return -EINVAL;
@@ -1412,8 +1411,7 @@ init_card(struct l1oip *hc, int pri, int bundle)
 		bch->ch.nr = i + ch;
 		list_add(&bch->ch.list, &dch->dev.bchannels);
 		hc->chan[i + ch].bch = bch;
-		test_and_set_bit(bch->nr & 0x1f,
-			&dch->dev.channelmap[bch->nr >> 5]);
+		set_channelmap(bch->nr, dch->dev.channelmap);
 	}
 	ret = mISDN_register_device(&dch->dev, hc->name);
 	if (ret)
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 4ba4cc364c9e..e5a20f9542d1 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -379,7 +379,7 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			di.Bprotocols = dev->Bprotocols | get_all_Bprotocols();
 			di.protocol = dev->D.protocol;
 			memcpy(di.channelmap, dev->channelmap,
-				MISDN_CHMAP_SIZE * 4);
+				sizeof(di.channelmap));
 			di.nrbchan = dev->nrbchan;
 			strcpy(di.name, dev->name);
 			if (copy_to_user((void __user *)arg, &di, sizeof(di)))
@@ -637,7 +637,7 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			di.Bprotocols = dev->Bprotocols | get_all_Bprotocols();
 			di.protocol = dev->D.protocol;
 			memcpy(di.channelmap, dev->channelmap,
-				MISDN_CHMAP_SIZE * 4);
+				sizeof(di.channelmap));
 			di.nrbchan = dev->nrbchan;
 			strcpy(di.name, dev->name);
 			if (copy_to_user((void __user *)arg, &di, sizeof(di)))
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 5c948f337817..8f2d60da04e7 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -37,7 +37,7 @@
  */
 #define	MISDN_MAJOR_VERSION	1
 #define	MISDN_MINOR_VERSION	0
-#define MISDN_RELEASE		18
+#define MISDN_RELEASE		19
 
 /* primitives for information exchange
  * generell format
@@ -242,7 +242,8 @@ struct mISDNhead {
 #define TEI_SAPI		63
 #define CTRL_SAPI		0
 
-#define MISDN_CHMAP_SIZE	4
+#define MISDN_MAX_CHANNEL	127
+#define MISDN_CHMAP_SIZE	((MISDN_MAX_CHANNEL + 1) >> 3)
 
 #define SOL_MISDN	0
 
@@ -275,11 +276,32 @@ struct mISDN_devinfo {
 	u_int			Dprotocols;
 	u_int			Bprotocols;
 	u_int			protocol;
-	u_long			channelmap[MISDN_CHMAP_SIZE];
+	u_char			channelmap[MISDN_CHMAP_SIZE];
 	u_int			nrbchan;
 	char			name[MISDN_MAX_IDLEN];
 };
 
+static inline int
+test_channelmap(u_int nr, u_char *map)
+{
+	if (nr <= MISDN_MAX_CHANNEL)
+		return map[nr >> 3] & (1 << (nr & 7));
+	else
+		return 0;
+}
+
+static inline void
+set_channelmap(u_int nr, u_char *map)
+{
+	map[nr >> 3] |= (1 << (nr & 7));
+}
+
+static inline void
+clear_channelmap(u_int nr, u_char *map)
+{
+	map[nr >> 3] &= ~(1 << (nr & 7));
+}
+
 /* CONTROL_CHANNEL parameters */
 #define MISDN_CTRL_GETOP		0x0000
 #define MISDN_CTRL_LOOP			0x0001
@@ -405,7 +427,7 @@ struct mISDNdevice {
 	u_int			Dprotocols;
 	u_int			Bprotocols;
 	u_int			nrbchan;
-	u_long			channelmap[MISDN_CHMAP_SIZE];
+	u_char			channelmap[MISDN_CHMAP_SIZE];
 	struct list_head	bchannels;
 	struct mISDNchannel	*teimgr;
 	struct device		dev;
@@ -430,7 +452,7 @@ struct mISDNstack {
 #endif
 };
 
-/* global alloc/queue dunctions */
+/* global alloc/queue functions */
 
 static inline struct sk_buff *
 mI_alloc_skb(unsigned int len, gfp_t gfp_mask)
-- 
cgit v1.2.3


From 85ebd00334099fd5d296bcae74a66c943d46686d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@misterjones.org>
Date: Sat, 2 Aug 2008 19:12:23 +0200
Subject: Fix IHEX firmware generation/loading

Fix both the IHEX firmware generation (len field always null, and EOF
marker a byte too short) and loading (struct ihex_binrec needs to be
packed to reflect the on-disk structure).

Signed-off-by: Marc Zyngier <maz@misterjones.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 firmware/ihex2fw.c   | 6 +++---
 include/linux/ihex.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/firmware/ihex2fw.c b/firmware/ihex2fw.c
index 660b191ed75e..8f7fdaa9e010 100644
--- a/firmware/ihex2fw.c
+++ b/firmware/ihex2fw.c
@@ -250,19 +250,19 @@ static void file_record(struct ihex_binrec *record)
 
 static int output_records(int outfd)
 {
-	unsigned char zeroes[5] = {0, 0, 0, 0, 0};
+	unsigned char zeroes[6] = {0, 0, 0, 0, 0, 0};
 	struct ihex_binrec *p = records;
 
 	while (p) {
 		uint16_t writelen = (p->len + 9) & ~3;
 
 		p->addr = htonl(p->addr);
-		p->len = htonl(p->len);
+		p->len = htons(p->len);
 		write(outfd, &p->addr, writelen);
 		p = p->next;
 	}
 	/* EOF record is zero length, since we don't bother to represent
 	   the type field in the binary version */
-	write(outfd, zeroes, 5);
+	write(outfd, zeroes, 6);
 	return 0;
 }
diff --git a/include/linux/ihex.h b/include/linux/ihex.h
index 2baace2788a7..31d8629e75a1 100644
--- a/include/linux/ihex.h
+++ b/include/linux/ihex.h
@@ -18,7 +18,7 @@ struct ihex_binrec {
 	__be32 addr;
 	__be16 len;
 	uint8_t data[0];
-} __attribute__((aligned(4)));
+} __attribute__((packed));
 
 /* Find the next record, taking into account the 4-byte alignment */
 static inline const struct ihex_binrec *
-- 
cgit v1.2.3


From cf368d2f9aced8adc8bd6b1f04294a71551d5fce Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Sun, 3 Aug 2008 03:03:57 +0400
Subject: drivers/video/console/promcon.c: fix build error

drivers/video/console/promcon.c:158: error: implicit declaration of
function 'con_protect_unimap'

Introduced by commit a29ccf6f823a84d89e1c7aaaf221cf7282022024
("embedded: fix vc_translate operator precedence").

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/vt_kern.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 8c8119ffee12..1c78d56c57e5 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -86,6 +86,7 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 #define con_copy_unimap(d, s) (0)
 #define con_get_unimap(vc, ct, uct, list) (-EINVAL)
 #define con_free_unimap(vc) do { ; } while (0)
+#define con_protect_unimap(vc, rdonly) do { ; } while (0)
 
 #define vc_translate(vc, c) (c)
 #endif
-- 
cgit v1.2.3


From 63870295de9adb365cd121dab94379b8cfdf986a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 4 Aug 2008 10:39:46 +0900
Subject: maple: Clean up maple_driver_register/unregister routines.

These were completely inconsistent. Clean these up to take a maple_driver
pointer directly for consistency.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/input/keyboard/maple_keyb.c |  6 +++---
 drivers/sh/maple/maple.c            | 37 ++++++++++++++++++++++++++-----------
 include/linux/maple.h               |  4 +++-
 3 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c
index 42f5d4ec39ab..3f5151a0fd15 100644
--- a/drivers/input/keyboard/maple_keyb.c
+++ b/drivers/input/keyboard/maple_keyb.c
@@ -235,17 +235,17 @@ static struct maple_driver dc_kbd_driver = {
 		.name = "Dreamcast_keyboard",
 		.probe = probe_maple_kbd,
 		.remove = remove_maple_kbd,
-       },
+	},
 };
 
 static int __init dc_kbd_init(void)
 {
-	return maple_driver_register(&dc_kbd_driver.drv);
+	return maple_driver_register(&dc_kbd_driver);
 }
 
 static void __exit dc_kbd_exit(void)
 {
-	driver_unregister(&dc_kbd_driver.drv);
+	maple_driver_unregister(&dc_kbd_driver);
 }
 
 module_init(dc_kbd_init);
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index be97789fa5fd..a6b4dc3cfcba 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -2,6 +2,7 @@
  * Core maple bus functionality
  *
  *  Copyright (C) 2007, 2008 Adrian McMenamin
+ *  Copyright (C) 2001 - 2008 Paul Mundt
  *
  * Based on 2.4 code by:
  *
@@ -31,7 +32,7 @@
 #include <mach/dma.h>
 #include <mach/sysasic.h>
 
-MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin");
+MODULE_AUTHOR("Yaegashi Takeshi, Paul Mundt, M. R. Brown, Adrian McMenamin");
 MODULE_DESCRIPTION("Maple bus driver for Dreamcast");
 MODULE_LICENSE("GPL v2");
 MODULE_SUPPORTED_DEVICE("{{SEGA, Dreamcast/Maple}}");
@@ -65,19 +66,35 @@ static bool checked[4];
 static struct maple_device *baseunits[4];
 
 /**
- *  maple_driver_register - register a device driver
- *  automatically makes the driver bus a maple bus
- *  @drv: the driver to be registered
+ * maple_driver_register - register a maple driver
+ * @drv: maple driver to be registered.
+ *
+ * Registers the passed in @drv, while updating the bus type.
+ * Devices with matching function IDs will be automatically probed.
  */
-int maple_driver_register(struct device_driver *drv)
+int maple_driver_register(struct maple_driver *drv)
 {
 	if (!drv)
 		return -EINVAL;
-	drv->bus = &maple_bus_type;
-	return driver_register(drv);
+
+	drv->drv.bus = &maple_bus_type;
+
+	return driver_register(&drv->drv);
 }
 EXPORT_SYMBOL_GPL(maple_driver_register);
 
+/**
+ * maple_driver_unregister - unregister a maple driver.
+ * @drv: maple driver to unregister.
+ *
+ * Cleans up after maple_driver_register(). To be invoked in the exit
+ * path of any module drivers.
+ */
+void maple_driver_unregister(struct maple_driver *drv)
+{
+	driver_unregister(&drv->drv);
+}
+
 /* set hardware registers to enable next round of dma */
 static void maplebus_dma_reset(void)
 {
@@ -724,11 +741,9 @@ static int maple_get_dma_buffer(void)
 static int match_maple_bus_driver(struct device *devptr,
 				  struct device_driver *drvptr)
 {
-	struct maple_driver *maple_drv;
-	struct maple_device *maple_dev;
+	struct maple_driver *maple_drv = to_maple_driver(drvptr);
+	struct maple_device *maple_dev = to_maple_dev(devptr);
 
-	maple_drv = container_of(drvptr, struct maple_driver, drv);
-	maple_dev = container_of(devptr, struct maple_device, dev);
 	/* Trap empty port case */
 	if (maple_dev->devinfo.function == 0xFFFFFFFF)
 		return 0;
diff --git a/include/linux/maple.h b/include/linux/maple.h
index c853b1066018..b2b7ce0fb1f7 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -70,7 +70,9 @@ void maple_getcond_callback(struct maple_device *dev,
 			    void (*callback) (struct mapleq * mq),
 			    unsigned long interval,
 			    unsigned long function);
-int maple_driver_register(struct device_driver *drv);
+int maple_driver_register(struct maple_driver *);
+void maple_driver_unregister(struct maple_driver *);
+
 int maple_add_packet_sleeps(struct maple_device *mdev, u32 function,
 	u32 command, u32 length, void *data);
 void maple_clear_dev(struct maple_device *mdev);
-- 
cgit v1.2.3


From 617870632de6739fca0893f3e6648e9ae1bd0ddb Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 4 Aug 2008 10:58:24 +0900
Subject: maple: Kill useless private_data pointer.

We can simply wrap in to the dev_set/get_drvdata(), there's no reason
to track an extra level of private data on top of the struct device.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/input/keyboard/maple_keyb.c | 15 ++++++++-------
 drivers/sh/maple/maple.c            |  1 +
 include/linux/maple.h               |  4 +++-
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c
index 3f5151a0fd15..22f17a593be7 100644
--- a/drivers/input/keyboard/maple_keyb.c
+++ b/drivers/input/keyboard/maple_keyb.c
@@ -139,7 +139,7 @@ static void dc_scan_kbd(struct dc_kbd *kbd)
 static void dc_kbd_callback(struct mapleq *mq)
 {
 	struct maple_device *mapledev = mq->dev;
-	struct dc_kbd *kbd = mapledev->private_data;
+	struct dc_kbd *kbd = maple_get_drvdata(mapledev);
 	unsigned long *buf = mq->recvbuf;
 
 	/*
@@ -175,8 +175,6 @@ static int probe_maple_kbd(struct device *dev)
 		goto fail;
 	}
 
-	mdev->private_data = kbd;
-
 	kbd->dev = idev;
 	memcpy(kbd->keycode, dc_kbd_keycode, sizeof(kbd->keycode));
 
@@ -204,27 +202,30 @@ static int probe_maple_kbd(struct device *dev)
 		MAPLE_FUNC_KEYBOARD);
 
 	mdev->driver = mdrv;
+
+	maple_set_drvdata(mdev, kbd);
+
 	return error;
 
 fail:
 	input_free_device(idev);
 	kfree(kbd);
-	mdev->private_data = NULL;
+	maple_set_drvdata(mdev, NULL);
 	return error;
 }
 
 static int remove_maple_kbd(struct device *dev)
 {
 	struct maple_device *mdev = to_maple_dev(dev);
-	struct dc_kbd *kbd;
+	struct dc_kbd *kbd = maple_get_drvdata(mdev);
 
 	mutex_lock(&maple_keyb_mutex);
 
-	kbd = mdev->private_data;
-	mdev->private_data = NULL;
 	input_unregister_device(kbd->dev);
 	kfree(kbd);
 
+	maple_set_drvdata(mdev, NULL);
+
 	mutex_unlock(&maple_keyb_mutex);
 	return 0;
 }
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index a6b4dc3cfcba..be77a39f224c 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -94,6 +94,7 @@ void maple_driver_unregister(struct maple_driver *drv)
 {
 	driver_unregister(&drv->drv);
 }
+EXPORT_SYMBOL_GPL(maple_driver_unregister);
 
 /* set hardware registers to enable next round of dma */
 static void maplebus_dma_reset(void)
diff --git a/include/linux/maple.h b/include/linux/maple.h
index b2b7ce0fb1f7..c23d3f51ba40 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -51,7 +51,6 @@ struct maple_devinfo {
 struct maple_device {
 	struct maple_driver *driver;
 	struct mapleq *mq;
-	void *private_data;
 	void (*callback) (struct mapleq * mq);
 	unsigned long when, interval, function;
 	struct maple_devinfo devinfo;
@@ -80,4 +79,7 @@ void maple_clear_dev(struct maple_device *mdev);
 #define to_maple_dev(n) container_of(n, struct maple_device, dev)
 #define to_maple_driver(n) container_of(n, struct maple_driver, drv)
 
+#define maple_get_drvdata(d)		dev_get_drvdata(&(d)->dev)
+#define maple_set_drvdata(d,p)		dev_set_drvdata(&(d)->dev, (p))
+
 #endif				/* __LINUX_MAPLE_H */
-- 
cgit v1.2.3


From 98f7dfd86cbbd377e2cbc293529681b914296f68 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 18 Jul 2008 13:52:59 +0800
Subject: mac80211: pass dtim_period to low level driver

This patch adds the dtim_period in ieee80211_bss_conf, this allows the low
level driver to know the dtim_period, and to plan power save accordingly.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 13 +++++++++++++
 include/net/mac80211.h     |  4 +++-
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 11 +++++++++++
 4 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a1630ba0b87c..7f4df7c7659d 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -506,6 +506,19 @@ struct ieee80211_channel_sw_ie {
 	u8 count;
 } __attribute__ ((packed));
 
+/**
+ * struct ieee80211_tim
+ *
+ * This structure refers to "Traffic Indication Map information element"
+ */
+struct ieee80211_tim_ie {
+	u8 dtim_count;
+	u8 dtim_period;
+	u8 bitmap_ctrl;
+	/* variable size: 1 - 251 bytes */
+	u8 virtual_map[0];
+} __attribute__ ((packed));
+
 struct ieee80211_mgmt {
 	__le16 frame_control;
 	__le16 duration;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b52721008be8..9d99f2e0a204 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -177,9 +177,10 @@ enum ieee80211_bss_change {
  * @aid: association ID number, valid only when @assoc is true
  * @use_cts_prot: use CTS protection
  * @use_short_preamble: use 802.11b short preamble
+ * @dtim_period: num of beacons before the next DTIM, for PSM
  * @timestamp: beacon timestamp
  * @beacon_int: beacon interval
- * @assoc_capability: capabbilities taken from assoc resp
+ * @assoc_capability: capabilities taken from assoc resp
  * @assoc_ht: association in HT mode
  * @ht_conf: ht capabilities
  * @ht_bss_conf: ht extended capabilities
@@ -191,6 +192,7 @@ struct ieee80211_bss_conf {
 	/* erp related data */
 	bool use_cts_prot;
 	bool use_short_preamble;
+	u8 dtim_period;
 	u16 beacon_int;
 	u16 assoc_capability;
 	u64 timestamp;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a2e200f9811e..ec59345af65b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -82,6 +82,7 @@ struct ieee80211_sta_bss {
 
 	u8 bssid[ETH_ALEN];
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
+	u8 dtim_period;
 	u16 capability; /* host byte order */
 	enum ieee80211_band band;
 	int freq;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index acb04133a95d..591e6331c427 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -551,6 +551,7 @@ static void ieee80211_set_associated(struct net_device *dev,
 			/* set timing information */
 			sdata->bss_conf.beacon_int = bss->beacon_int;
 			sdata->bss_conf.timestamp = bss->timestamp;
+			sdata->bss_conf.dtim_period = bss->dtim_period;
 
 			changed |= ieee80211_handle_bss_capability(sdata, bss);
 
@@ -2688,6 +2689,16 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
 	bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
 
+	if (elems->tim) {
+		struct ieee80211_tim_ie *tim_ie =
+			(struct ieee80211_tim_ie *)elems->tim;
+		bss->dtim_period = tim_ie->dtim_period;
+	}
+
+	/* set default value for buggy APs */
+	if (!elems->tim || bss->dtim_period == 0)
+		bss->dtim_period = 1;
+
 	bss->supp_rates_len = 0;
 	if (elems->supp_rates) {
 		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
-- 
cgit v1.2.3


From 1a3f7d98e5f50f21ce6fb1406a35531d9596c5c6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 4 Aug 2008 16:50:38 -0700
Subject: Revert "UFS: add const to parser token table"

This reverts commit f9247273cb69ba101877e946d2d83044409cc8c5 (and
fb2e405fc1fc8b20d9c78eaa1c7fd5a297efde43 - "fix fs/nfs/nfsroot.c
compilation" - that fixed a missed conversion).

The changes cause problems for at least the sparc build.  Let's re-do
them when the exact issues are resolved.

Requested-by: Andrew Morton <akpm@linux-foundation.org>
Requested-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/nfsroot.c       | 2 +-
 fs/ufs/super.c         | 2 +-
 include/linux/parser.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8478fc25daee..46763d1cd397 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t __initconst tokens = {
+static match_table_t __initdata tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rsize, "rsize=%u"},
 	{Opt_wsize, "wsize=%u"},
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3e30e40aa24d..3141969b456d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
 	unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
-	const struct match_token *tp = tokens;
+	struct match_token *tp = tokens;
 
 	while (tp->token != Opt_onerror_panic && tp->token != mval)
 		++tp;
diff --git a/include/linux/parser.h b/include/linux/parser.h
index cc554ca8bc78..7dcd05075756 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -14,7 +14,7 @@ struct match_token {
 	const char *pattern;
 };
 
-typedef const struct match_token match_table_t[];
+typedef struct match_token match_table_t[];
 
 /* Maximum number of arguments that match_token will find in a pattern */
 enum {MAX_OPT_ARGS = 3};
-- 
cgit v1.2.3


From 115a326c1e5cab457924356123bbfd7d783ecf9d Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Mon, 4 Aug 2008 13:56:01 -0700
Subject: tracehook: kerneldoc fix

My last change to tracehook.h made it confuse the kerneldoc parser.
Move the #define's before the comment so it's happy again.

Signed-off-by: Roland McGrath <roland@redhat.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 12532839f508..ab3ef7aefa95 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -487,6 +487,9 @@ static inline int tracehook_notify_jctl(int notify, int why)
 	return notify || (current->ptrace & PT_PTRACED);
 }
 
+#define DEATH_REAP			-1
+#define DEATH_DELAYED_GROUP_LEADER	-2
+
 /**
  * tracehook_notify_death - task is dead, ready to notify parent
  * @task:		@current task now exiting
@@ -501,8 +504,6 @@ static inline int tracehook_notify_jctl(int notify, int why)
  *
  * Called with write_lock_irq(&tasklist_lock) held.
  */
-#define DEATH_REAP			-1
-#define DEATH_DELAYED_GROUP_LEADER	-2
 static inline int tracehook_notify_death(struct task_struct *task,
 					 void **death_cookie, int group_dead)
 {
-- 
cgit v1.2.3


From 529ae9aaa08378cfe2a4350bded76f32cc8ff0ce Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 2 Aug 2008 12:01:03 +0200
Subject: mm: rename page trylock

Converting page lock to new locking bitops requires a change of page flag
operation naming, so we might as well convert it to something nicer
(!TestSetPageLocked_Lock => trylock_page, SetPageLocked => set_page_locked).

This also facilitates lockdeping of page lock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/scsi/sg.c           |  2 +-
 fs/afs/write.c              |  2 +-
 fs/cifs/file.c              |  2 +-
 fs/jbd/commit.c             |  4 +--
 fs/jbd2/commit.c            |  2 +-
 fs/reiserfs/journal.c       |  2 +-
 fs/splice.c                 |  2 +-
 fs/xfs/linux-2.6/xfs_aops.c |  4 +--
 include/linux/page-flags.h  |  2 +-
 include/linux/pagemap.h     | 67 +++++++++++++++++++++++++++------------------
 mm/filemap.c                | 12 ++++----
 mm/memory.c                 |  2 +-
 mm/migrate.c                |  4 +--
 mm/rmap.c                   |  2 +-
 mm/shmem.c                  |  4 +--
 mm/swap.c                   |  2 +-
 mm/swap_state.c             |  8 +++---
 mm/swapfile.c               |  2 +-
 mm/truncate.c               |  4 +--
 mm/vmscan.c                 |  4 +--
 20 files changed, 74 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index d3b8ebb83776..3d36270a8b4d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1747,7 +1747,7 @@ st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
                  */
 		flush_dcache_page(pages[i]);
 		/* ?? Is locking needed? I don't think so */
-		/* if (TestSetPageLocked(pages[i]))
+		/* if (!trylock_page(pages[i]))
 		   goto out_unlock; */
         }
 
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9a849ad3c489..065b4e10681a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
 			page = pages[loop];
 			if (page->index > wb->last)
 				break;
-			if (TestSetPageLocked(page))
+			if (!trylock_page(page))
 				break;
 			if (!PageDirty(page) ||
 			    page_private(page) != (unsigned long) wb) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a5..e692c42f24b5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1280,7 +1280,7 @@ retry:
 
 			if (first < 0)
 				lock_page(page);
-			else if (TestSetPageLocked(page))
+			else if (!trylock_page(page))
 				break;
 
 			if (unlikely(page->mapping != mapping)) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 2eccbfaa1d48..81a9ad7177ca 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
 		goto nope;
 
 	/* OK, it's a truncated page */
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		goto nope;
 
 	page_cache_get(page);
@@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal)
 			spin_lock(&journal->j_list_lock);
 		}
 		if (unlikely(!buffer_uptodate(bh))) {
-			if (TestSetPageLocked(bh->b_page)) {
+			if (!trylock_page(bh->b_page)) {
 				spin_unlock(&journal->j_list_lock);
 				lock_page(bh->b_page);
 				spin_lock(&journal->j_list_lock);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index adf0395f318e..f2ad061e95ec 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh)
 		goto nope;
 
 	/* OK, it's a truncated page */
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		goto nope;
 
 	page_cache_get(page);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c8f60ee183b5..ce2208b27118 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s,
 static void release_buffer_page(struct buffer_head *bh)
 {
 	struct page *page = bh->b_page;
-	if (!page->mapping && !TestSetPageLocked(page)) {
+	if (!page->mapping && trylock_page(page)) {
 		page_cache_get(page);
 		put_bh(bh);
 		if (!page->mapping)
diff --git a/fs/splice.c b/fs/splice.c
index b30311ba8af6..1bbc6f4bb09c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 			 * for an in-flight io page
 			 */
 			if (flags & SPLICE_F_NONBLOCK) {
-				if (TestSetPageLocked(page)) {
+				if (!trylock_page(page)) {
 					error = -EAGAIN;
 					break;
 				}
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 0b211cba1909..fa73179233ad 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -675,7 +675,7 @@ xfs_probe_cluster(
 			} else
 				pg_offset = PAGE_CACHE_SIZE;
 
-			if (page->index == tindex && !TestSetPageLocked(page)) {
+			if (page->index == tindex && trylock_page(page)) {
 				pg_len = xfs_probe_page(page, pg_offset, mapped);
 				unlock_page(page);
 			}
@@ -759,7 +759,7 @@ xfs_convert_page(
 
 	if (page->index != tindex)
 		goto fail;
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		goto fail;
 	if (PageWriteback(page))
 		goto fail_unlock_page;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 25aaccdb2f26..c74d3e875314 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -163,7 +163,7 @@ static inline int Page##uname(struct page *page) 			\
 
 struct page;	/* forward declaration */
 
-PAGEFLAG(Locked, locked) TESTSCFLAG(Locked, locked)
+TESTPAGEFLAG(Locked, locked)
 PAGEFLAG(Error, error)
 PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 69ed3cb1197a..5da31c12101c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -250,29 +250,6 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
 	return read_cache_page(mapping, index, filler, data);
 }
 
-int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
-				pgoff_t index, gfp_t gfp_mask);
-int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-				pgoff_t index, gfp_t gfp_mask);
-extern void remove_from_page_cache(struct page *page);
-extern void __remove_from_page_cache(struct page *page);
-
-/*
- * Like add_to_page_cache_locked, but used to add newly allocated pages:
- * the page is new, so we can just run SetPageLocked() against it.
- */
-static inline int add_to_page_cache(struct page *page,
-		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
-{
-	int error;
-
-	SetPageLocked(page);
-	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
-	if (unlikely(error))
-		ClearPageLocked(page);
-	return error;
-}
-
 /*
  * Return byte-offset into filesystem object for page.
  */
@@ -294,13 +271,28 @@ extern int __lock_page_killable(struct page *page);
 extern void __lock_page_nosync(struct page *page);
 extern void unlock_page(struct page *page);
 
+static inline void set_page_locked(struct page *page)
+{
+	set_bit(PG_locked, &page->flags);
+}
+
+static inline void clear_page_locked(struct page *page)
+{
+	clear_bit(PG_locked, &page->flags);
+}
+
+static inline int trylock_page(struct page *page)
+{
+	return !test_and_set_bit(PG_locked, &page->flags);
+}
+
 /*
  * lock_page may only be called if we have the page's inode pinned.
  */
 static inline void lock_page(struct page *page)
 {
 	might_sleep();
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		__lock_page(page);
 }
 
@@ -312,7 +304,7 @@ static inline void lock_page(struct page *page)
 static inline int lock_page_killable(struct page *page)
 {
 	might_sleep();
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		return __lock_page_killable(page);
 	return 0;
 }
@@ -324,7 +316,7 @@ static inline int lock_page_killable(struct page *page)
 static inline void lock_page_nosync(struct page *page)
 {
 	might_sleep();
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		__lock_page_nosync(page);
 }
 	
@@ -409,4 +401,27 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
 	return ret;
 }
 
+int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
+int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
+extern void remove_from_page_cache(struct page *page);
+extern void __remove_from_page_cache(struct page *page);
+
+/*
+ * Like add_to_page_cache_locked, but used to add newly allocated pages:
+ * the page is new, so we can just run set_page_locked() against it.
+ */
+static inline int add_to_page_cache(struct page *page,
+		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
+{
+	int error;
+
+	set_page_locked(page);
+	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
+	if (unlikely(error))
+		clear_page_locked(page);
+	return error;
+}
+
 #endif /* _LINUX_PAGEMAP_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index d97d1ad55473..54e968650855 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -558,14 +558,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
  *
  * The first mb is necessary to safely close the critical section opened by the
- * TestSetPageLocked(), the second mb is necessary to enforce ordering between
- * the clear_bit and the read of the waitqueue (to avoid SMP races with a
- * parallel wait_on_page_locked()).
+ * test_and_set_bit() to lock the page; the second mb is necessary to enforce
+ * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
+ * races with a parallel wait_on_page_locked()).
  */
 void unlock_page(struct page *page)
 {
 	smp_mb__before_clear_bit();
-	if (!TestClearPageLocked(page))
+	if (!test_and_clear_bit(PG_locked, &page->flags))
 		BUG();
 	smp_mb__after_clear_bit(); 
 	wake_up_page(page, PG_locked);
@@ -931,7 +931,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
 	struct page *page = find_get_page(mapping, index);
 
 	if (page) {
-		if (!TestSetPageLocked(page))
+		if (trylock_page(page))
 			return page;
 		page_cache_release(page);
 		return NULL;
@@ -1027,7 +1027,7 @@ find_page:
 			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
-			if (TestSetPageLocked(page))
+			if (!trylock_page(page))
 				goto page_not_up_to_date;
 			if (!mapping->a_ops->is_partially_uptodate(page,
 								desc, offset))
diff --git a/mm/memory.c b/mm/memory.c
index a472bcd4b061..1002f473f497 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1789,7 +1789,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * not dirty accountable.
 	 */
 	if (PageAnon(old_page)) {
-		if (!TestSetPageLocked(old_page)) {
+		if (trylock_page(old_page)) {
 			reuse = can_share_swap_page(old_page);
 			unlock_page(old_page);
 		}
diff --git a/mm/migrate.c b/mm/migrate.c
index 153572fb60b8..2a80136b23bb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -605,7 +605,7 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 	 * establishing additional references. We are the only one
 	 * holding a reference to the new page at this point.
 	 */
-	if (TestSetPageLocked(newpage))
+	if (!trylock_page(newpage))
 		BUG();
 
 	/* Prepare mapping for the new page.*/
@@ -667,7 +667,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	BUG_ON(charge);
 
 	rc = -EAGAIN;
-	if (TestSetPageLocked(page)) {
+	if (!trylock_page(page)) {
 		if (!force)
 			goto move_newpage;
 		lock_page(page);
diff --git a/mm/rmap.c b/mm/rmap.c
index 94a5246a3f98..1ea4e6fcee77 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -422,7 +422,7 @@ int page_referenced(struct page *page, int is_locked,
 			referenced += page_referenced_anon(page, mem_cont);
 		else if (is_locked)
 			referenced += page_referenced_file(page, mem_cont);
-		else if (TestSetPageLocked(page))
+		else if (!trylock_page(page))
 			referenced++;
 		else {
 			if (page->mapping)
diff --git a/mm/shmem.c b/mm/shmem.c
index c1e5a3b4f758..04fb4f1ab88e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1265,7 +1265,7 @@ repeat:
 		}
 
 		/* We have to do this with page locked to prevent races */
-		if (TestSetPageLocked(swappage)) {
+		if (!trylock_page(swappage)) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			wait_on_page_locked(swappage);
@@ -1329,7 +1329,7 @@ repeat:
 		shmem_swp_unmap(entry);
 		filepage = find_get_page(mapping, idx);
 		if (filepage &&
-		    (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
+		    (!PageUptodate(filepage) || !trylock_page(filepage))) {
 			spin_unlock(&info->lock);
 			wait_on_page_locked(filepage);
 			page_cache_release(filepage);
diff --git a/mm/swap.c b/mm/swap.c
index 7417a2adbe50..9e0cb3118079 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -444,7 +444,7 @@ void pagevec_strip(struct pagevec *pvec)
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 
-		if (PagePrivate(page) && !TestSetPageLocked(page)) {
+		if (PagePrivate(page) && trylock_page(page)) {
 			if (PagePrivate(page))
 				try_to_release_page(page, 0);
 			unlock_page(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b8035b055129..167cf2dc8a03 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -201,7 +201,7 @@ void delete_from_swap_cache(struct page *page)
  */
 static inline void free_swap_cache(struct page *page)
 {
-	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
+	if (PageSwapCache(page) && trylock_page(page)) {
 		remove_exclusive_swap_page(page);
 		unlock_page(page);
 	}
@@ -302,9 +302,9 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * re-using the just freed swap entry for an existing page.
 		 * May fail (-ENOMEM) if radix-tree node allocation failed.
 		 */
-		SetPageLocked(new_page);
+		set_page_locked(new_page);
 		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
-		if (!err) {
+		if (likely(!err)) {
 			/*
 			 * Initiate read into locked page and return.
 			 */
@@ -312,7 +312,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
-		ClearPageLocked(new_page);
+		clear_page_locked(new_page);
 		swap_free(entry);
 	} while (err != -ENOMEM);
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bb7f79641f9e..1e330f2998fa 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -403,7 +403,7 @@ void free_swap_and_cache(swp_entry_t entry)
 	if (p) {
 		if (swap_entry_free(p, swp_offset(entry)) == 1) {
 			page = find_get_page(&swapper_space, entry.val);
-			if (page && unlikely(TestSetPageLocked(page))) {
+			if (page && unlikely(!trylock_page(page))) {
 				page_cache_release(page);
 				page = NULL;
 			}
diff --git a/mm/truncate.c b/mm/truncate.c
index 894e9a70699f..250505091d37 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -187,7 +187,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			if (page_index > next)
 				next = page_index;
 			next++;
-			if (TestSetPageLocked(page))
+			if (!trylock_page(page))
 				continue;
 			if (PageWriteback(page)) {
 				unlock_page(page);
@@ -280,7 +280,7 @@ unsigned long __invalidate_mapping_pages(struct address_space *mapping,
 			pgoff_t index;
 			int lock_failed;
 
-			lock_failed = TestSetPageLocked(page);
+			lock_failed = !trylock_page(page);
 
 			/*
 			 * We really shouldn't be looking at the ->index of an
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 75be453628bf..1ff1a58e7c10 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -496,7 +496,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		page = lru_to_page(page_list);
 		list_del(&page->lru);
 
-		if (TestSetPageLocked(page))
+		if (!trylock_page(page))
 			goto keep;
 
 		VM_BUG_ON(PageActive(page));
@@ -582,7 +582,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				 * A synchronous write - probably a ramdisk.  Go
 				 * ahead and try to reclaim the page.
 				 */
-				if (TestSetPageLocked(page))
+				if (!trylock_page(page))
 					goto keep;
 				if (PageDirty(page) || PageWriteback(page))
 					goto keep_locked;
-- 
cgit v1.2.3


From ca5de404ff036a29b25e9a83f6919c9f606c5841 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 2 Aug 2008 12:02:13 +0200
Subject: fs: rename buffer trylock

Like the page lock change, this also requires name change, so convert the
raw test_and_set bitop to a trylock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                 | 4 ++--
 fs/jbd/commit.c             | 2 +-
 fs/ntfs/aops.c              | 2 +-
 fs/ntfs/compress.c          | 2 +-
 fs/ntfs/mft.c               | 4 ++--
 fs/reiserfs/inode.c         | 2 +-
 fs/reiserfs/journal.c       | 4 ++--
 fs/xfs/linux-2.6/xfs_aops.c | 2 +-
 include/linux/buffer_head.h | 8 ++++++--
 9 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 4dbe52948e8f..38653e36e225 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		 */
 		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
 			lock_buffer(bh);
-		} else if (test_set_buffer_locked(bh)) {
+		} else if (!trylock_buffer(bh)) {
 			redirty_page_for_writepage(wbc, page);
 			continue;
 		}
@@ -3000,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 
 		if (rw == SWRITE || rw == SWRITE_SYNC)
 			lock_buffer(bh);
-		else if (test_set_buffer_locked(bh))
+		else if (!trylock_buffer(bh))
 			continue;
 
 		if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 81a9ad7177ca..ae08c057e751 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -221,7 +221,7 @@ write_out_data:
 		 * blocking lock_buffer().
 		 */
 		if (buffer_dirty(bh)) {
-			if (test_set_buffer_locked(bh)) {
+			if (!trylock_buffer(bh)) {
 				BUFFER_TRACE(bh, "needs blocking lock");
 				spin_unlock(&journal->j_list_lock);
 				/* Write out all data to prevent deadlocks */
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 00e9ccde8e42..b38f944f0667 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1194,7 +1194,7 @@ lock_retry_remap:
 		tbh = bhs[i];
 		if (!tbh)
 			continue;
-		if (unlikely(test_set_buffer_locked(tbh)))
+		if (!trylock_buffer(tbh))
 			BUG();
 		/* The buffer dirty state is now irrelevant, just clean it. */
 		clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 33ff314cc507..9669541d0119 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -665,7 +665,7 @@ lock_retry_remap:
 	for (i = 0; i < nr_bhs; i++) {
 		struct buffer_head *tbh = bhs[i];
 
-		if (unlikely(test_set_buffer_locked(tbh)))
+		if (!trylock_buffer(tbh))
 			continue;
 		if (unlikely(buffer_uptodate(tbh))) {
 			unlock_buffer(tbh);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 790defb847e7..17d32ca6bc35 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
 			struct buffer_head *tbh = bhs[i_bhs];
 
-			if (unlikely(test_set_buffer_locked(tbh)))
+			if (!trylock_buffer(tbh))
 				BUG();
 			BUG_ON(!buffer_uptodate(tbh));
 			clear_buffer_dirty(tbh);
@@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
 		struct buffer_head *tbh = bhs[i_bhs];
 
-		if (unlikely(test_set_buffer_locked(tbh)))
+		if (!trylock_buffer(tbh))
 			BUG();
 		BUG_ON(!buffer_uptodate(tbh));
 		clear_buffer_dirty(tbh);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 192269698a8a..5699171212ae 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page,
 		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
 			lock_buffer(bh);
 		} else {
-			if (test_set_buffer_locked(bh)) {
+			if (!trylock_buffer(bh)) {
 				redirty_page_for_writepage(wbc, page);
 				continue;
 			}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index ce2208b27118..c21df71943a6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock,
 		jh = JH_ENTRY(list->next);
 		bh = jh->bh;
 		get_bh(bh);
-		if (test_set_buffer_locked(bh)) {
+		if (!trylock_buffer(bh)) {
 			if (!buffer_dirty(bh)) {
 				list_move(&jh->list, &tmp);
 				goto loop_next;
@@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
 {
 	PROC_INFO_INC(p_s_sb, journal.prepare);
 
-	if (test_set_buffer_locked(bh)) {
+	if (!trylock_buffer(bh)) {
 		if (!wait)
 			return 0;
 		lock_buffer(bh);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index fa73179233ad..fa47e43b8b41 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1104,7 +1104,7 @@ xfs_page_state_convert(
 			 * that we are writing into for the first time.
 			 */
 			type = IOMAP_NEW;
-			if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+			if (trylock_buffer(bh)) {
 				ASSERT(buffer_mapped(bh));
 				if (iomap_valid)
 					all_bh = 1;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 50cfe8ceb478..eadaab44015f 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -115,7 +115,6 @@ BUFFER_FNS(Uptodate, uptodate)
 BUFFER_FNS(Dirty, dirty)
 TAS_BUFFER_FNS(Dirty, dirty)
 BUFFER_FNS(Lock, locked)
-TAS_BUFFER_FNS(Lock, locked)
 BUFFER_FNS(Req, req)
 TAS_BUFFER_FNS(Req, req)
 BUFFER_FNS(Mapped, mapped)
@@ -321,10 +320,15 @@ static inline void wait_on_buffer(struct buffer_head *bh)
 		__wait_on_buffer(bh);
 }
 
+static inline int trylock_buffer(struct buffer_head *bh)
+{
+	return likely(!test_and_set_bit(BH_Lock, &bh->b_state));
+}
+
 static inline void lock_buffer(struct buffer_head *bh)
 {
 	might_sleep();
-	if (test_set_buffer_locked(bh))
+	if (!trylock_buffer(bh))
 		__lock_buffer(bh);
 }
 
-- 
cgit v1.2.3


From 378a2f090f7a478704a372a4869b8a9ac206234e Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 4 Aug 2008 22:31:03 -0700
Subject: net_sched: Add qdisc __NET_XMIT_STOLEN flag

Patrick McHardy <kaber@trash.net> noticed:
"The other problem that affects all qdiscs supporting actions is
TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
even though the packet is not queued, corrupting upper qdiscs'
qlen counters."

and later explained:
"The reason why it translates it at all seems to be to not increase
the drops counter. Within a single qdisc this could be avoided by
other means easily, upper qdiscs would still increase the counter
when we return anything besides NET_XMIT_SUCCESS though.

This means we need a new NET_XMIT return value to indicate this to
the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
in dev_queue_xmit, similar to NET_XMIT_BYPASS."

David Miller <davem@davemloft.net> noticed:
"Maybe these NET_XMIT_* values being passed around should be a set of
bits. They could be composed of base meanings, combined with specific
attributes.

So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT"

The attributes get masked out by the top-level ->enqueue() caller,
such that the base meanings are the only thing that make their
way up into the stack. If it's only about communication within the
qdisc tree, let's simply code it that way."

This patch is trying to realize these ideas.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/sch_generic.h | 14 +++++++++++++-
 net/sched/sch_atm.c       | 12 +++++++-----
 net/sched/sch_cbq.c       | 23 +++++++++++++++--------
 net/sched/sch_dsmark.c    |  8 +++++---
 net/sched/sch_hfsc.c      |  8 +++++---
 net/sched/sch_htb.c       | 18 +++++++++++-------
 net/sched/sch_netem.c     |  3 ++-
 net/sched/sch_prio.c      |  8 +++++---
 net/sched/sch_red.c       |  2 +-
 net/sched/sch_sfq.c       |  2 +-
 net/sched/sch_tbf.c       |  3 ++-
 12 files changed, 68 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ee583f642a9f..abbf5d52ec86 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -64,6 +64,7 @@ struct wireless_dev;
 #define NET_XMIT_BYPASS		4	/* packet does not leave via dequeue;
 					   (TC use only - dev_queue_xmit
 					   returns this as NET_XMIT_SUCCESS) */
+#define NET_XMIT_MASK		0xFFFF	/* qdisc flags in net/sch_generic.h */
 
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0   /* keep 'em coming, baby */
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c5bb13065051..f15b045a85e9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -343,6 +343,18 @@ static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
 	return qdisc_skb_cb(skb)->pkt_len;
 }
 
+#ifdef CONFIG_NET_CLS_ACT
+/* additional qdisc xmit flags */
+enum net_xmit_qdisc_t {
+	__NET_XMIT_STOLEN = 0x00010000,
+};
+
+#define net_xmit_drop_count(e)	((e) & __NET_XMIT_STOLEN ? 0 : 1)
+
+#else
+#define net_xmit_drop_count(e)	(1)
+#endif
+
 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 #ifdef CONFIG_NET_SCHED
@@ -355,7 +367,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 {
 	qdisc_skb_cb(skb)->pkt_len = skb->len;
-	return qdisc_enqueue(skb, sch);
+	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
 }
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6b517b9dac5b..27dd773481bc 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -415,7 +415,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
 			kfree_skb(skb);
-			return NET_XMIT_SUCCESS;
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			kfree_skb(skb);
 			goto drop;
@@ -432,9 +432,11 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	ret = qdisc_enqueue(skb, flow->q);
 	if (ret != 0) {
 drop: __maybe_unused
-		sch->qstats.drops++;
-		if (flow)
-			flow->qstats.drops++;
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			if (flow)
+				flow->qstats.drops++;
+		}
 		return ret;
 	}
 	sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -530,7 +532,7 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	if (!ret) {
 		sch->q.qlen++;
 		sch->qstats.requeues++;
-	} else {
+	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
 		p->link.qstats.drops++;
 	}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 14954bf4a683..765ae5659000 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -256,7 +256,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		case TC_ACT_RECLASSIFY:
@@ -397,9 +397,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return ret;
 	}
 
-	sch->qstats.drops++;
-	cbq_mark_toplevel(q, cl);
-	cl->qstats.drops++;
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cbq_mark_toplevel(q, cl);
+		cl->qstats.drops++;
+	}
 	return ret;
 }
 
@@ -430,8 +432,10 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 			cbq_activate_class(cl);
 		return 0;
 	}
-	sch->qstats.drops++;
-	cl->qstats.drops++;
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+	}
 	return ret;
 }
 
@@ -664,13 +668,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 	q->rx_class = NULL;
 
 	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
+		int ret;
 
 		cbq_mark_toplevel(q, cl);
 
 		q->rx_class = cl;
 		cl->q->__parent = sch;
 
-		if (qdisc_enqueue(skb, cl->q) == 0) {
+		ret = qdisc_enqueue(skb, cl->q);
+		if (ret == NET_XMIT_SUCCESS) {
 			sch->q.qlen++;
 			sch->bstats.packets++;
 			sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -678,7 +684,8 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 				cbq_activate_class(cl);
 			return 0;
 		}
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(ret))
+			sch->qstats.drops++;
 		return 0;
 	}
 
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index a935676987e2..7170275d9f99 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -236,7 +236,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
 			kfree_skb(skb);
-			return NET_XMIT_SUCCESS;
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 
 		case TC_ACT_SHOT:
 			goto drop;
@@ -254,7 +254,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = qdisc_enqueue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
 		return err;
 	}
 
@@ -321,7 +322,8 @@ static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = p->q->ops->requeue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
 		return err;
 	}
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 0ae7d19dcba8..5cf9ae716118 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1166,7 +1166,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -1586,8 +1586,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
-		cl->qstats.drops++;
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err)) {
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+		}
 		return err;
 	}
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 75a40951c4f2..538d79b489ae 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -221,7 +221,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -572,9 +572,11 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		kfree_skb(skb);
 		return ret;
 #endif
-	} else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
-		cl->qstats.drops++;
+	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			cl->qstats.drops++;
+		}
 		return NET_XMIT_DROP;
 	} else {
 		cl->bstats.packets +=
@@ -615,10 +617,12 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 		kfree_skb(skb);
 		return ret;
 #endif
-	} else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
+	} else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
 		   NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
-		cl->qstats.drops++;
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			cl->qstats.drops++;
+		}
 		return NET_XMIT_DROP;
 	} else
 		htb_activate(q, cl);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a59085700678..6cd6f2bc749e 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -240,8 +240,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		sch->q.qlen++;
 		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
-	} else
+	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
+	}
 
 	pr_debug("netem: enqueue ret %d\n", ret);
 	return ret;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index f849243eb095..adb1a52b77d3 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -45,7 +45,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (err) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -88,7 +88,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
-	sch->qstats.drops++;
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
 	return ret;
 }
 
@@ -114,7 +115,8 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 		sch->qstats.requeues++;
 		return 0;
 	}
-	sch->qstats.drops++;
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
 	return NET_XMIT_DROP;
 }
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 3f2d1d7f3bbd..5da05839e225 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -97,7 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		sch->q.qlen++;
-	} else {
+	} else if (net_xmit_drop_count(ret)) {
 		q->stats.pdrop++;
 		sch->qstats.drops++;
 	}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8589da666568..3a456e1b829a 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		switch (result) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return 0;
 		}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b296672f7632..7d3b7ff3bf07 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -135,7 +135,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	ret = qdisc_enqueue(skb, q->qdisc);
 	if (ret != 0) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(ret))
+			sch->qstats.drops++;
 		return ret;
 	}
 
-- 
cgit v1.2.3


From cc6533e98a7f3cb7fce9d740da49195c7aa523a4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 4 Aug 2008 23:04:08 -0700
Subject: net: Kill plain NET_XMIT_BYPASS.

dst_input() was doing something completely absurd, looping
on skb->dst->input() if NET_XMIT_BYPASS was seen, but these
functions never return such an error.

And as a result plain ole' NET_XMIT_BYPASS has no more
references and can be completely killed off.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 ---
 include/net/dst.h         | 12 +-----------
 2 files changed, 1 insertion(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index abbf5d52ec86..488c56e649b5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -61,9 +61,6 @@ struct wireless_dev;
 #define NET_XMIT_DROP		1	/* skb dropped			*/
 #define NET_XMIT_CN		2	/* congestion notification	*/
 #define NET_XMIT_POLICED	3	/* skb is shot by police	*/
-#define NET_XMIT_BYPASS		4	/* packet does not leave via dequeue;
-					   (TC use only - dev_queue_xmit
-					   returns this as NET_XMIT_SUCCESS) */
 #define NET_XMIT_MASK		0xFFFF	/* qdisc flags in net/sch_generic.h */
 
 /* Backlog congestion levels */
diff --git a/include/net/dst.h b/include/net/dst.h
index c5c318a628f8..8a8b71e5f3f1 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -252,17 +252,7 @@ static inline int dst_output(struct sk_buff *skb)
 /* Input packet from network to transport.  */
 static inline int dst_input(struct sk_buff *skb)
 {
-	int err;
-
-	for (;;) {
-		err = skb->dst->input(skb);
-
-		if (likely(err == 0))
-			return err;
-		/* Oh, Jamal... Seems, I will not forgive you this mess. :-) */
-		if (unlikely(err != NET_XMIT_BYPASS))
-			return err;
-	}
+	return skb->dst->input(skb);
 }
 
 static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
-- 
cgit v1.2.3


From 39b986a6c73434d122967dc86efb295ab9a28437 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 5 Aug 2008 18:16:57 +0200
Subject: ide: sanitize struct ide_port_ops documentation (take 2)

v2:
Add missing '@'-s.  (Noticed by Randy Dunlap)

Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index b846bc44a27e..b1fb15f10a00 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -509,24 +509,33 @@ struct ide_tp_ops {
 
 extern const struct ide_tp_ops default_tp_ops;
 
+/**
+ * struct ide_port_ops - IDE port operations
+ *
+ * @init_dev:		host specific initialization of a device
+ * @set_pio_mode:	routine to program host for PIO mode
+ * @set_dma_mode:	routine to program host for DMA mode
+ * @selectproc:		tweaks hardware to select drive
+ * @reset_poll:		chipset polling based on hba specifics
+ * @pre_reset:		chipset specific changes to default for device-hba resets
+ * @resetproc:		routine to reset controller after a disk reset
+ * @maskproc:		special host masking for drive selection
+ * @quirkproc:		check host's drive quirk list
+ *
+ * @mdma_filter:	filter MDMA modes
+ * @udma_filter:	filter UDMA modes
+ *
+ * @cable_detect:	detect cable type
+ */
 struct ide_port_ops {
-	/* host specific initialization of a device */
 	void	(*init_dev)(ide_drive_t *);
-	/* routine to program host for PIO mode */
 	void	(*set_pio_mode)(ide_drive_t *, const u8);
-	/* routine to program host for DMA mode */
 	void	(*set_dma_mode)(ide_drive_t *, const u8);
-	/* tweaks hardware to select drive */
 	void	(*selectproc)(ide_drive_t *);
-	/* chipset polling based on hba specifics */
 	int	(*reset_poll)(ide_drive_t *);
-	/* chipset specific changes to default for device-hba resets */
 	void	(*pre_reset)(ide_drive_t *);
-	/* routine to reset controller after a disk reset */
 	void	(*resetproc)(ide_drive_t *);
-	/* special host masking for drive selection */
 	void	(*maskproc)(ide_drive_t *, int);
-	/* check host's drive quirk list */
 	void	(*quirkproc)(ide_drive_t *);
 
 	u8	(*mdma_filter)(ide_drive_t *);
-- 
cgit v1.2.3


From c5bfc3757f1d843a8e1261840c1f53c5062f8e92 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 5 Aug 2008 18:17:01 +0200
Subject: ide: remove CONFIG_IDE_MAX_HWIFS

The benefits of a user settable CONFIG_IDE_MAX_HWIFS have become pretty
tiny and are no longer considered worth the trouble of an own option.

Simply always #define MAX_HWIFS to 10.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig | 10 ----------
 include/linux/ide.h | 13 +------------
 2 files changed, 1 insertion(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 130ef64b44f7..a34758d29516 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -54,16 +54,6 @@ menuconfig IDE
 
 if IDE
 
-config IDE_MAX_HWIFS
-	int "Max IDE interfaces"
-	depends on ALPHA || SUPERH || IA64 || EMBEDDED
-	range 1 10
-	default 4
-	help
-	  This is the maximum number of IDE hardware interfaces that will
-	  be supported by the driver. Make sure it is at least as high as
-	  the number of IDE interfaces in your system.
-
 config BLK_DEV_IDE
 	tristate "Enhanced IDE/MFM/RLL disk/cdrom/tape/floppy support"
 	---help---
diff --git a/include/linux/ide.h b/include/linux/ide.h
index b1fb15f10a00..87c12ed96954 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -219,18 +219,7 @@ static inline int __ide_default_irq(unsigned long base)
 #include <asm-generic/ide_iops.h>
 #endif
 
-#ifndef MAX_HWIFS
-#if defined(CONFIG_BLACKFIN) || defined(CONFIG_H8300) || defined(CONFIG_XTENSA)
-# define MAX_HWIFS	1
-#else
-# define MAX_HWIFS	10
-#endif
-#endif
-
-#if !defined(MAX_HWIFS) || defined(CONFIG_EMBEDDED)
-#undef MAX_HWIFS
-#define MAX_HWIFS	CONFIG_IDE_MAX_HWIFS
-#endif
+#define MAX_HWIFS	10
 
 /* Currently only m68k, apus and m8xx need it */
 #ifndef IDE_ARCH_ACK_INTR
-- 
cgit v1.2.3


From c6e2bee26eee190b20cd87e71b288bca6a5357a4 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Tue, 5 Aug 2008 13:01:05 -0700
Subject: kdump: report actual value of VMCOREINFO_OSRELEASE in VMCOREINFO

The current implementation reports the structure name as
VMCOREINFO_OSRELEASE in VMCOREINFO, e.g.

        VMCOREINFO_OSRELEASE=init_uts_ns.name.release

That doesn't make sense because it's always the same. Instead, use the
value, e.g.

        VMCOREINFO_OSRELEASE=2.6.26-rc3

That's also what the 'makedumpfile -g' does.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Cc: "Ken'ichi Ohmichi" <oomichi@mxs.nes.nec.co.jp>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 82f88a8a827b..32110cede64f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -130,8 +130,8 @@ void vmcoreinfo_append_str(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
 unsigned long paddr_vmcoreinfo_note(void);
 
-#define VMCOREINFO_OSRELEASE(name) \
-	vmcoreinfo_append_str("OSRELEASE=%s\n", #name)
+#define VMCOREINFO_OSRELEASE(value) \
+	vmcoreinfo_append_str("OSRELEASE=%s\n", value)
 #define VMCOREINFO_PAGESIZE(value) \
 	vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
 #define VMCOREINFO_SYMBOL(name) \
-- 
cgit v1.2.3


From 60cadec9da7b6c91aca51f408c828f7e74a68379 Mon Sep 17 00:00:00 2001
From: Shadi Ammouri <shadi@marvell.com>
Date: Tue, 5 Aug 2008 13:01:09 -0700
Subject: spi: new orion_spi driver

This adds an SPI driver for the SPI controller found in various Marvell
Orion ARM SoCs.  It currently supports only one slave, which must use SPI
mode 0.

[dbrownell@users.sourceforge.net: cleanups, meet specs, pass "sparse"]
Signed-off-by: Shadi Ammouri <shadi@marvell.com>
Signed-off-by: Saeed Bishara <saeed@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/Kconfig           |   6 +
 drivers/spi/Makefile          |   1 +
 drivers/spi/orion_spi.c       | 574 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/orion_spi.h |  17 ++
 4 files changed, 598 insertions(+)
 create mode 100644 drivers/spi/orion_spi.c
 create mode 100644 include/linux/spi/orion_spi.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 2303521b4f09..b9d0efb6803f 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -149,6 +149,12 @@ config SPI_OMAP24XX
 	  SPI master controller for OMAP24xx/OMAP34xx Multichannel SPI
 	  (McSPI) modules.
 
+config SPI_ORION
+	tristate "Orion SPI master (EXPERIMENTAL)"
+	depends on PLAT_ORION && EXPERIMENTAL
+	help
+	  This enables using the SPI master controller on the Orion chips.
+
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
 	depends on ARCH_PXA && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 7fca043ce723..ccf18de34e1e 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
 obj-$(CONFIG_SPI_PXA2XX)		+= pxa2xx_spi.o
 obj-$(CONFIG_SPI_OMAP_UWIRE)		+= omap_uwire.o
 obj-$(CONFIG_SPI_OMAP24XX)		+= omap2_mcspi.o
+obj-$(CONFIG_SPI_ORION)			+= orion_spi.o
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= mpc52xx_psc_spi.o
 obj-$(CONFIG_SPI_MPC83xx)		+= spi_mpc83xx.o
 obj-$(CONFIG_SPI_S3C24XX_GPIO)		+= spi_s3c24xx_gpio.o
diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c
new file mode 100644
index 000000000000..c4eaacd6e553
--- /dev/null
+++ b/drivers/spi/orion_spi.c
@@ -0,0 +1,574 @@
+/*
+ * orion_spi.c -- Marvell Orion SPI controller driver
+ *
+ * Author: Shadi Ammouri <shadi@marvell.com>
+ * Copyright (C) 2007-2008 Marvell Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/orion_spi.h>
+#include <asm/unaligned.h>
+
+#define DRIVER_NAME			"orion_spi"
+
+#define ORION_NUM_CHIPSELECTS		1 /* only one slave is supported*/
+#define ORION_SPI_WAIT_RDY_MAX_LOOP	2000 /* in usec */
+
+#define ORION_SPI_IF_CTRL_REG		0x00
+#define ORION_SPI_IF_CONFIG_REG		0x04
+#define ORION_SPI_DATA_OUT_REG		0x08
+#define ORION_SPI_DATA_IN_REG		0x0c
+#define ORION_SPI_INT_CAUSE_REG		0x10
+
+#define ORION_SPI_IF_8_16_BIT_MODE	(1 << 5)
+#define ORION_SPI_CLK_PRESCALE_MASK	0x1F
+
+struct orion_spi {
+	struct work_struct	work;
+
+	/* Lock access to transfer list.	*/
+	spinlock_t		lock;
+
+	struct list_head	msg_queue;
+	struct spi_master	*master;
+	void __iomem		*base;
+	unsigned int		max_speed;
+	unsigned int		min_speed;
+	struct orion_spi_info	*spi_info;
+};
+
+static struct workqueue_struct *orion_spi_wq;
+
+static inline void __iomem *spi_reg(struct orion_spi *orion_spi, u32 reg)
+{
+	return orion_spi->base + reg;
+}
+
+static inline void
+orion_spi_setbits(struct orion_spi *orion_spi, u32 reg, u32 mask)
+{
+	void __iomem *reg_addr = spi_reg(orion_spi, reg);
+	u32 val;
+
+	val = readl(reg_addr);
+	val |= mask;
+	writel(val, reg_addr);
+}
+
+static inline void
+orion_spi_clrbits(struct orion_spi *orion_spi, u32 reg, u32 mask)
+{
+	void __iomem *reg_addr = spi_reg(orion_spi, reg);
+	u32 val;
+
+	val = readl(reg_addr);
+	val &= ~mask;
+	writel(val, reg_addr);
+}
+
+static int orion_spi_set_transfer_size(struct orion_spi *orion_spi, int size)
+{
+	if (size == 16) {
+		orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
+				  ORION_SPI_IF_8_16_BIT_MODE);
+	} else if (size == 8) {
+		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
+				  ORION_SPI_IF_8_16_BIT_MODE);
+	} else {
+		pr_debug("Bad bits per word value %d (only 8 or 16 are "
+			 "allowed).\n", size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int orion_spi_baudrate_set(struct spi_device *spi, unsigned int speed)
+{
+	u32 tclk_hz;
+	u32 rate;
+	u32 prescale;
+	u32 reg;
+	struct orion_spi *orion_spi;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+
+	tclk_hz = orion_spi->spi_info->tclk;
+
+	/*
+	 * the supported rates are: 4,6,8...30
+	 * round up as we look for equal or less speed
+	 */
+	rate = DIV_ROUND_UP(tclk_hz, speed);
+	rate = roundup(rate, 2);
+
+	/* check if requested speed is too small */
+	if (rate > 30)
+		return -EINVAL;
+
+	if (rate < 4)
+		rate = 4;
+
+	/* Convert the rate to SPI clock divisor value.	*/
+	prescale = 0x10 + rate/2;
+
+	reg = readl(spi_reg(orion_spi, ORION_SPI_IF_CONFIG_REG));
+	reg = ((reg & ~ORION_SPI_CLK_PRESCALE_MASK) | prescale);
+	writel(reg, spi_reg(orion_spi, ORION_SPI_IF_CONFIG_REG));
+
+	return 0;
+}
+
+/*
+ * called only when no transfer is active on the bus
+ */
+static int
+orion_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
+{
+	struct orion_spi *orion_spi;
+	unsigned int speed = spi->max_speed_hz;
+	unsigned int bits_per_word = spi->bits_per_word;
+	int	rc;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+
+	if ((t != NULL) && t->speed_hz)
+		speed = t->speed_hz;
+
+	if ((t != NULL) && t->bits_per_word)
+		bits_per_word = t->bits_per_word;
+
+	rc = orion_spi_baudrate_set(spi, speed);
+	if (rc)
+		return rc;
+
+	return orion_spi_set_transfer_size(orion_spi, bits_per_word);
+}
+
+static void orion_spi_set_cs(struct orion_spi *orion_spi, int enable)
+{
+	if (enable)
+		orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
+	else
+		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
+}
+
+static inline int orion_spi_wait_till_ready(struct orion_spi *orion_spi)
+{
+	int i;
+
+	for (i = 0; i < ORION_SPI_WAIT_RDY_MAX_LOOP; i++) {
+		if (readl(spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG)))
+			return 1;
+		else
+			udelay(1);
+	}
+
+	return -1;
+}
+
+static inline int
+orion_spi_write_read_8bit(struct spi_device *spi,
+			  const u8 **tx_buf, u8 **rx_buf)
+{
+	void __iomem *tx_reg, *rx_reg, *int_reg;
+	struct orion_spi *orion_spi;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+	tx_reg = spi_reg(orion_spi, ORION_SPI_DATA_OUT_REG);
+	rx_reg = spi_reg(orion_spi, ORION_SPI_DATA_IN_REG);
+	int_reg = spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG);
+
+	/* clear the interrupt cause register */
+	writel(0x0, int_reg);
+
+	if (tx_buf && *tx_buf)
+		writel(*(*tx_buf)++, tx_reg);
+	else
+		writel(0, tx_reg);
+
+	if (orion_spi_wait_till_ready(orion_spi) < 0) {
+		dev_err(&spi->dev, "TXS timed out\n");
+		return -1;
+	}
+
+	if (rx_buf && *rx_buf)
+		*(*rx_buf)++ = readl(rx_reg);
+
+	return 1;
+}
+
+static inline int
+orion_spi_write_read_16bit(struct spi_device *spi,
+			   const u16 **tx_buf, u16 **rx_buf)
+{
+	void __iomem *tx_reg, *rx_reg, *int_reg;
+	struct orion_spi *orion_spi;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+	tx_reg = spi_reg(orion_spi, ORION_SPI_DATA_OUT_REG);
+	rx_reg = spi_reg(orion_spi, ORION_SPI_DATA_IN_REG);
+	int_reg = spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG);
+
+	/* clear the interrupt cause register */
+	writel(0x0, int_reg);
+
+	if (tx_buf && *tx_buf)
+		writel(__cpu_to_le16(get_unaligned((*tx_buf)++)), tx_reg);
+	else
+		writel(0, tx_reg);
+
+	if (orion_spi_wait_till_ready(orion_spi) < 0) {
+		dev_err(&spi->dev, "TXS timed out\n");
+		return -1;
+	}
+
+	if (rx_buf && *rx_buf)
+		put_unaligned(__le16_to_cpu(readl(rx_reg)), (*rx_buf)++);
+
+	return 1;
+}
+
+static unsigned int
+orion_spi_write_read(struct spi_device *spi, struct spi_transfer *xfer)
+{
+	struct orion_spi *orion_spi;
+	unsigned int count;
+	int word_len;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+	word_len = spi->bits_per_word;
+	count = xfer->len;
+
+	if (word_len == 8) {
+		const u8 *tx = xfer->tx_buf;
+		u8 *rx = xfer->rx_buf;
+
+		do {
+			if (orion_spi_write_read_8bit(spi, &tx, &rx) < 0)
+				goto out;
+			count--;
+		} while (count);
+	} else if (word_len == 16) {
+		const u16 *tx = xfer->tx_buf;
+		u16 *rx = xfer->rx_buf;
+
+		do {
+			if (orion_spi_write_read_16bit(spi, &tx, &rx) < 0)
+				goto out;
+			count -= 2;
+		} while (count);
+	}
+
+out:
+	return xfer->len - count;
+}
+
+
+static void orion_spi_work(struct work_struct *work)
+{
+	struct orion_spi *orion_spi =
+		container_of(work, struct orion_spi, work);
+
+	spin_lock_irq(&orion_spi->lock);
+	while (!list_empty(&orion_spi->msg_queue)) {
+		struct spi_message *m;
+		struct spi_device *spi;
+		struct spi_transfer *t = NULL;
+		int par_override = 0;
+		int status = 0;
+		int cs_active = 0;
+
+		m = container_of(orion_spi->msg_queue.next, struct spi_message,
+				 queue);
+
+		list_del_init(&m->queue);
+		spin_unlock_irq(&orion_spi->lock);
+
+		spi = m->spi;
+
+		/* Load defaults */
+		status = orion_spi_setup_transfer(spi, NULL);
+
+		if (status < 0)
+			goto msg_done;
+
+		list_for_each_entry(t, &m->transfers, transfer_list) {
+			if (par_override || t->speed_hz || t->bits_per_word) {
+				par_override = 1;
+				status = orion_spi_setup_transfer(spi, t);
+				if (status < 0)
+					break;
+				if (!t->speed_hz && !t->bits_per_word)
+					par_override = 0;
+			}
+
+			if (!cs_active) {
+				orion_spi_set_cs(orion_spi, 1);
+				cs_active = 1;
+			}
+
+			if (t->len)
+				m->actual_length +=
+					orion_spi_write_read(spi, t);
+
+			if (t->delay_usecs)
+				udelay(t->delay_usecs);
+
+			if (t->cs_change) {
+				orion_spi_set_cs(orion_spi, 0);
+				cs_active = 0;
+			}
+		}
+
+msg_done:
+		if (cs_active)
+			orion_spi_set_cs(orion_spi, 0);
+
+		m->status = status;
+		m->complete(m->context);
+
+		spin_lock_irq(&orion_spi->lock);
+	}
+
+	spin_unlock_irq(&orion_spi->lock);
+}
+
+static int __init orion_spi_reset(struct orion_spi *orion_spi)
+{
+	/* Verify that the CS is deasserted */
+	orion_spi_set_cs(orion_spi, 0);
+
+	return 0;
+}
+
+static int orion_spi_setup(struct spi_device *spi)
+{
+	struct orion_spi *orion_spi;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+
+	if (spi->mode) {
+		dev_err(&spi->dev, "setup: unsupported mode bits %x\n",
+			spi->mode);
+		return -EINVAL;
+	}
+
+	if (spi->bits_per_word == 0)
+		spi->bits_per_word = 8;
+
+	if ((spi->max_speed_hz == 0)
+			|| (spi->max_speed_hz > orion_spi->max_speed))
+		spi->max_speed_hz = orion_spi->max_speed;
+
+	if (spi->max_speed_hz < orion_spi->min_speed) {
+		dev_err(&spi->dev, "setup: requested speed too low %d Hz\n",
+			spi->max_speed_hz);
+		return -EINVAL;
+	}
+
+	/*
+	 * baudrate & width will be set orion_spi_setup_transfer
+	 */
+	return 0;
+}
+
+static int orion_spi_transfer(struct spi_device *spi, struct spi_message *m)
+{
+	struct orion_spi *orion_spi;
+	struct spi_transfer *t = NULL;
+	unsigned long flags;
+
+	m->actual_length = 0;
+	m->status = 0;
+
+	/* reject invalid messages and transfers */
+	if (list_empty(&m->transfers) || !m->complete)
+		return -EINVAL;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		unsigned int bits_per_word = spi->bits_per_word;
+
+		if (t->tx_buf == NULL && t->rx_buf == NULL && t->len) {
+			dev_err(&spi->dev,
+				"message rejected : "
+				"invalid transfer data buffers\n");
+			goto msg_rejected;
+		}
+
+		if ((t != NULL) && t->bits_per_word)
+			bits_per_word = t->bits_per_word;
+
+		if ((bits_per_word != 8) && (bits_per_word != 16)) {
+			dev_err(&spi->dev,
+				"message rejected : "
+				"invalid transfer bits_per_word (%d bits)\n",
+				bits_per_word);
+			goto msg_rejected;
+		}
+		/*make sure buffer length is even when working in 16 bit mode*/
+		if ((t != NULL) && (t->bits_per_word == 16) && (t->len & 1)) {
+			dev_err(&spi->dev,
+				"message rejected : "
+				"odd data length (%d) while in 16 bit mode\n",
+				t->len);
+			goto msg_rejected;
+		}
+
+		if (t->speed_hz < orion_spi->min_speed) {
+			dev_err(&spi->dev,
+				"message rejected : "
+				"device min speed (%d Hz) exceeds "
+				"required transfer speed (%d Hz)\n",
+				orion_spi->min_speed, t->speed_hz);
+			goto msg_rejected;
+		}
+	}
+
+
+	spin_lock_irqsave(&orion_spi->lock, flags);
+	list_add_tail(&m->queue, &orion_spi->msg_queue);
+	queue_work(orion_spi_wq, &orion_spi->work);
+	spin_unlock_irqrestore(&orion_spi->lock, flags);
+
+	return 0;
+msg_rejected:
+	/* Message rejected and not queued */
+	m->status = -EINVAL;
+	if (m->complete)
+		m->complete(m->context);
+	return -EINVAL;
+}
+
+static int __init orion_spi_probe(struct platform_device *pdev)
+{
+	struct spi_master *master;
+	struct orion_spi *spi;
+	struct resource *r;
+	struct orion_spi_info *spi_info;
+	int status = 0;
+
+	spi_info = pdev->dev.platform_data;
+
+	master = spi_alloc_master(&pdev->dev, sizeof *spi);
+	if (master == NULL) {
+		dev_dbg(&pdev->dev, "master allocation failed\n");
+		return -ENOMEM;
+	}
+
+	if (pdev->id != -1)
+		master->bus_num = pdev->id;
+
+	master->setup = orion_spi_setup;
+	master->transfer = orion_spi_transfer;
+	master->num_chipselect = ORION_NUM_CHIPSELECTS;
+
+	dev_set_drvdata(&pdev->dev, master);
+
+	spi = spi_master_get_devdata(master);
+	spi->master = master;
+	spi->spi_info = spi_info;
+
+	spi->max_speed = DIV_ROUND_UP(spi_info->tclk, 4);
+	spi->min_speed = DIV_ROUND_UP(spi_info->tclk, 30);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (r == NULL) {
+		status = -ENODEV;
+		goto out;
+	}
+
+	if (!request_mem_region(r->start, (r->end - r->start) + 1,
+				pdev->dev.bus_id)) {
+		status = -EBUSY;
+		goto out;
+	}
+	spi->base = ioremap(r->start, SZ_1K);
+
+	INIT_WORK(&spi->work, orion_spi_work);
+
+	spin_lock_init(&spi->lock);
+	INIT_LIST_HEAD(&spi->msg_queue);
+
+	if (orion_spi_reset(spi) < 0)
+		goto out_rel_mem;
+
+	status = spi_register_master(master);
+	if (status < 0)
+		goto out_rel_mem;
+
+	return status;
+
+out_rel_mem:
+	release_mem_region(r->start, (r->end - r->start) + 1);
+
+out:
+	spi_master_put(master);
+	return status;
+}
+
+
+static int __exit orion_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master;
+	struct orion_spi *spi;
+	struct resource *r;
+
+	master = dev_get_drvdata(&pdev->dev);
+	spi = spi_master_get_devdata(master);
+
+	cancel_work_sync(&spi->work);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(r->start, (r->end - r->start) + 1);
+
+	spi_unregister_master(master);
+
+	return 0;
+}
+
+MODULE_ALIAS("platform:" DRIVER_NAME);
+
+static struct platform_driver orion_spi_driver = {
+	.driver = {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.remove		= __exit_p(orion_spi_remove),
+};
+
+static int __init orion_spi_init(void)
+{
+	orion_spi_wq = create_singlethread_workqueue(
+				orion_spi_driver.driver.name);
+	if (orion_spi_wq == NULL)
+		return -ENOMEM;
+
+	return platform_driver_probe(&orion_spi_driver, orion_spi_probe);
+}
+module_init(orion_spi_init);
+
+static void __exit orion_spi_exit(void)
+{
+	flush_workqueue(orion_spi_wq);
+	platform_driver_unregister(&orion_spi_driver);
+
+	destroy_workqueue(orion_spi_wq);
+}
+module_exit(orion_spi_exit);
+
+MODULE_DESCRIPTION("Orion SPI driver");
+MODULE_AUTHOR("Shadi Ammouri <shadi@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/spi/orion_spi.h b/include/linux/spi/orion_spi.h
new file mode 100644
index 000000000000..b4d9fa6f797c
--- /dev/null
+++ b/include/linux/spi/orion_spi.h
@@ -0,0 +1,17 @@
+/*
+ * orion_spi.h
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __LINUX_SPI_ORION_SPI_H
+#define __LINUX_SPI_ORION_SPI_H
+
+struct orion_spi_info {
+	u32	tclk;		/* no <linux/clk.h> support yet */
+};
+
+
+#endif
-- 
cgit v1.2.3


From f6ac436dcc4c34709bcde355f3f2254ac0a183d4 Mon Sep 17 00:00:00 2001
From: Mark Asselstine <mark.asselstine@windriver.com>
Date: Tue, 5 Aug 2008 13:01:24 -0700
Subject: Remove the deprecated cli() sti() functions

These functions have been deprecated for some time now but remained until
all legacy callers could be removed.  With a few commits in 2.6.26 this
has happened so now we can remove these deprecated functions.

Signed-off-by: Mark Asselstine <mark.asselstine@windriver.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/00-INDEX            |   2 -
 Documentation/cli-sti-removal.txt | 133 --------------------------------------
 include/linux/interrupt.h         |  29 ---------
 3 files changed, 164 deletions(-)
 delete mode 100644 Documentation/cli-sti-removal.txt

(limited to 'include/linux')

diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 6de71308a906..5b5aba404aac 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -89,8 +89,6 @@ cciss.txt
 	- info, major/minor #'s for Compaq's SMART Array Controllers.
 cdrom/
 	- directory with information on the CD-ROM drivers that Linux has.
-cli-sti-removal.txt
-	- cli()/sti() removal guide.
 computone.txt
 	- info on Computone Intelliport II/Plus Multiport Serial Driver.
 connector/
diff --git a/Documentation/cli-sti-removal.txt b/Documentation/cli-sti-removal.txt
deleted file mode 100644
index 60932b02fcb3..000000000000
--- a/Documentation/cli-sti-removal.txt
+++ /dev/null
@@ -1,133 +0,0 @@
-
-#### cli()/sti() removal guide, started by Ingo Molnar <mingo@redhat.com>
-
-
-as of 2.5.28, five popular macros have been removed on SMP, and
-are being phased out on UP:
-
- cli(), sti(), save_flags(flags), save_flags_cli(flags), restore_flags(flags)
-
-until now it was possible to protect driver code against interrupt
-handlers via a cli(), but from now on other, more lightweight methods
-have to be used for synchronization, such as spinlocks or semaphores.
-
-for example, driver code that used to do something like:
-
-	struct driver_data;
-
-	irq_handler (...)
-	{
-		....
-		driver_data.finish = 1;
-		driver_data.new_work = 0;
-		....
-	}
-
-	...
-
-	ioctl_func (...)
-	{
-		...
-		cli();
-		...
-		driver_data.finish = 0;
-		driver_data.new_work = 2;
-		...
-		sti();
-		...
-	}
-
-was SMP-correct because the cli() function ensured that no
-interrupt handler (amongst them the above irq_handler()) function
-would execute while the cli()-ed section is executing.
-
-but from now on a more direct method of locking has to be used:
-
-	DEFINE_SPINLOCK(driver_lock);
-	struct driver_data;
-
-	irq_handler (...)
-	{
-		unsigned long flags;
-		....
-		spin_lock_irqsave(&driver_lock, flags);
-		....
-		driver_data.finish = 1;
-		driver_data.new_work = 0;
-		....
-		spin_unlock_irqrestore(&driver_lock, flags);
-		....
-	}
-
-	...
-
-	ioctl_func (...)
-	{
-		...
-		spin_lock_irq(&driver_lock);
-		...
-		driver_data.finish = 0;
-		driver_data.new_work = 2;
-		...
-		spin_unlock_irq(&driver_lock);
-		...
-	}
-
-the above code has a number of advantages:
-
-- the locking relation is easier to understand - actual lock usage
-  pinpoints the critical sections. cli() usage is too opaque.
-  Easier to understand means it's easier to debug.
-
-- it's faster, because spinlocks are faster to acquire than the
-  potentially heavily-used IRQ lock. Furthermore, your driver does
-  not have to wait eg. for a big heavy SCSI interrupt to finish,
-  because the driver_lock spinlock is only used by your driver.
-  cli() on the other hand was used by many drivers, and extended
-  the critical section to the whole IRQ handler function - creating
-  serious lock contention.
-
- 
-to make the transition easier, we've still kept the cli(), sti(),
-save_flags(), save_flags_cli() and restore_flags() macros defined
-on UP systems - but their usage will be phased out until 2.6 is
-released.
-
-drivers that want to disable local interrupts (interrupts on the
-current CPU), can use the following five macros:
-
-  local_irq_disable(), local_irq_enable(), local_save_flags(flags),
-  local_irq_save(flags), local_irq_restore(flags)
-
-but beware, their meaning and semantics are much simpler, far from
-that of the old cli(), sti(), save_flags(flags) and restore_flags(flags)
-SMP meaning:
-
-    local_irq_disable()       => turn local IRQs off
-
-    local_irq_enable()        => turn local IRQs on
-
-    local_save_flags(flags)   => save the current IRQ state into flags. The
-                                 state can be on or off. (on some
-                                 architectures there's even more bits in it.)
-
-    local_irq_save(flags)     => save the current IRQ state into flags and
-                                 disable interrupts.
-
-    local_irq_restore(flags)  => restore the IRQ state from flags.
-
-(local_irq_save can save both irqs on and irqs off state, and
-local_irq_restore can restore into both irqs on and irqs off state.)
-
-another related change is that synchronize_irq() now takes a parameter:
-synchronize_irq(irq). This change too has the purpose of making SMP
-synchronization more lightweight - this way you can wait for your own
-interrupt handler to finish, no need to wait for other IRQ sources.
-
-
-why were these changes done? The main reason was the architectural burden
-of maintaining the cli()/sti() interface - it became a real problem. The
-new interrupt system is much more streamlined, easier to understand, debug,
-and it's also a bit faster - the same happened to it that will happen to
-cli()/sti() using drivers once they convert to spinlocks :-)
-
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 62aa4f895abe..58ff4e74b2f3 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -223,35 +223,6 @@ static inline int disable_irq_wake(unsigned int irq)
 #define or_softirq_pending(x)  (local_softirq_pending() |= (x))
 #endif
 
-/*
- * Temporary defines for UP kernels, until all code gets fixed.
- */
-#ifndef CONFIG_SMP
-static inline void __deprecated cli(void)
-{
-	local_irq_disable();
-}
-static inline void __deprecated sti(void)
-{
-	local_irq_enable();
-}
-static inline void __deprecated save_flags(unsigned long *x)
-{
-	local_save_flags(*x);
-}
-#define save_flags(x) save_flags(&x)
-static inline void __deprecated restore_flags(unsigned long x)
-{
-	local_irq_restore(x);
-}
-
-static inline void __deprecated save_and_cli(unsigned long *x)
-{
-	local_irq_save(*x);
-}
-#define save_and_cli(x)	save_and_cli(&x)
-#endif /* CONFIG_SMP */
-
 /* Some architectures might implement lazy enabling/disabling of
  * interrupts. In some cases, such as stop_machine, we might want
  * to ensure that after a local_irq_disable(), interrupts have
-- 
cgit v1.2.3


From bf1db69fbf4ff511e88736ce2e6318846f34492b Mon Sep 17 00:00:00 2001
From: Richard Hughes <richard@hughsie.com>
Date: Tue, 5 Aug 2008 13:01:35 -0700
Subject: pm_qos: spelling fixes

A documentation cleanup patch.  With a minor tweak to clarify units for
kbs.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: mark gross <mgross@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/power/pm_qos_interface.txt |  7 ++++++-
 include/linux/pm_qos_params.h            |  2 +-
 kernel/pm_qos_params.c                   | 16 ++++++++--------
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index 49adb1a33514..c40866e8b957 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -1,4 +1,4 @@
-PM quality of Service interface.
+PM Quality Of Service Interface.
 
 This interface provides a kernel and user mode interface for registering
 performance expectations by drivers, subsystems and user space applications on
@@ -7,6 +7,11 @@ one of the parameters.
 Currently we have {cpu_dma_latency, network_latency, network_throughput} as the
 initial set of pm_qos parameters.
 
+Each parameters have defined units:
+ * latency: usec
+ * timeout: usec
+ * throughput: kbs (kilo bit / sec)
+
 The infrastructure exposes multiple misc device nodes one per implemented
 parameter.  The set of parameters implement is defined by pm_qos_power_init()
 and pm_qos_params.h.  This is done because having the available parameters
diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h
index 2e4e97bd19f7..d74f75ed1e47 100644
--- a/include/linux/pm_qos_params.h
+++ b/include/linux/pm_qos_params.h
@@ -1,6 +1,6 @@
 /* interface for the pm_qos_power infrastructure of the linux kernel.
  *
- * Mark Gross
+ * Mark Gross <mgross@linux.intel.com>
  */
 #include <linux/list.h>
 #include <linux/notifier.h>
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 8cb757026386..da9c2dda6a4e 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -24,7 +24,7 @@
  * requirement that the application has is cleaned up when closes the file
  * pointer or exits the pm_qos_object will get an opportunity to clean up.
  *
- * mark gross mgross@linux.intel.com
+ * Mark Gross <mgross@linux.intel.com>
  */
 
 #include <linux/pm_qos_params.h>
@@ -211,8 +211,8 @@ EXPORT_SYMBOL_GPL(pm_qos_requirement);
  * @value: defines the qos request
  *
  * This function inserts a new entry in the pm_qos_class list of requested qos
- * performance charactoistics.  It recomputes the agregate QoS expectations for
- * the pm_qos_class of parrameters.
+ * performance characteristics.  It recomputes the aggregate QoS expectations
+ * for the pm_qos_class of parameters.
  */
 int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
 {
@@ -250,10 +250,10 @@ EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
  * @name: identifies the request
  * @value: defines the qos request
  *
- * Updates an existing qos requierement for the pm_qos_class of parameters along
+ * Updates an existing qos requirement for the pm_qos_class of parameters along
  * with updating the target pm_qos_class value.
  *
- * If the named request isn't in the lest then no change is made.
+ * If the named request isn't in the list then no change is made.
  */
 int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
 {
@@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
  * @pm_qos_class: identifies which list of qos request to us
  * @name: identifies the request
  *
- * Will remove named qos request from pm_qos_class list of parrameters and
+ * Will remove named qos request from pm_qos_class list of parameters and
  * recompute the current target value for the pm_qos_class.
  */
 void pm_qos_remove_requirement(int pm_qos_class, char *name)
@@ -319,7 +319,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
  * @notifier: notifier block managed by caller.
  *
  * will register the notifier into a notification chain that gets called
- * uppon changes to the pm_qos_class target value.
+ * upon changes to the pm_qos_class target value.
  */
  int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
@@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(pm_qos_add_notifier);
  * @notifier: notifier block to be removed.
  *
  * will remove the notifier from the notification chain that gets called
- * uppon changes to the pm_qos_class target value.
+ * upon changes to the pm_qos_class target value.
  */
 int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
-- 
cgit v1.2.3