Merge branch 'devel-stable' into devel

Conflicts: arch/arm/Kconfig arch/arm/include/asm/system.h arch/arm/mm/Kconfig
author: Russell King <rmk+kernel@arm.linux.org.uk> 2010-05-17 17:24:04 +0100
committer: Russell King <rmk+kernel@arm.linux.org.uk> 2010-05-17 17:24:04 +0100
commit: ac1d426e825ab5778995f2f6f053ca2e6b45c622 (patch)
tree: 75b91356ca39463e0112931aa6790802fb1e07a2 /Documentation
parent: fda0e18c8a7a3e02747c2b045b4fcd2c920410b9 (diff)
parent: a3685f00652af83f12b63e3b4ef48f29581ba48b (diff)
37 files changed, 880 insertions, 182 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index a986e9bbba3d..bcebb9eaedce 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -160,7 +160,7 @@ Description:
 		match the driver to the device.  For example:
 		# echo "046d c315" > /sys/bus/usb/drivers/foo/remove_id
 
-What:		/sys/bus/usb/device/.../avoid_reset
+What:		/sys/bus/usb/device/.../avoid_reset_quirk
 Date:		December 2009
 Contact:	Oliver Neukum <oliver@neukum.org>
 Description:
diff --git a/Documentation/PCI/PCI-DMA-mapping.txt b/Documentation/DMA-API-HOWTO.txt
index 52618ab069ad..52618ab069ad 100644
--- a/Documentation/PCI/PCI-DMA-mapping.txt
+++ b/Documentation/DMA-API-HOWTO.txt
diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl
index ba9975771503..ff3e5bec1c24 100644
--- a/Documentation/DocBook/libata.tmpl
+++ b/Documentation/DocBook/libata.tmpl
@@ -107,10 +107,6 @@ void (*dev_config) (struct ata_port *, struct ata_device *);
 	issue of SET FEATURES - XFER MODE, and prior to operation.
 	</para>
 	<para>
-	Called by ata_device_add() after ata_dev_identify() determines
-	a device is present.
-	</para>
-	<para>
 	This entry may be specified as NULL in ata_port_operations.
 	</para>
 
@@ -154,8 +150,8 @@ unsigned int (*mode_filter) (struct ata_port *, struct ata_device *, unsigned in
 
 	<sect2><title>Taskfile read/write</title>
 	<programlisting>
-void (*tf_load) (struct ata_port *ap, struct ata_taskfile *tf);
-void (*tf_read) (struct ata_port *ap, struct ata_taskfile *tf);
+void (*sff_tf_load) (struct ata_port *ap, struct ata_taskfile *tf);
+void (*sff_tf_read) (struct ata_port *ap, struct ata_taskfile *tf);
 	</programlisting>
 
 	<para>
@@ -164,36 +160,35 @@ void (*tf_read) (struct ata_port *ap, struct ata_taskfile *tf);
 	hardware registers / DMA buffers, to obtain the current set of
 	taskfile register values.
 	Most drivers for taskfile-based hardware (PIO or MMIO) use
-	ata_tf_load() and ata_tf_read() for these hooks.
+	ata_sff_tf_load() and ata_sff_tf_read() for these hooks.
 	</para>
 
 	</sect2>
 
 	<sect2><title>PIO data read/write</title>
 	<programlisting>
-void (*data_xfer) (struct ata_device *, unsigned char *, unsigned int, int);
+void (*sff_data_xfer) (struct ata_device *, unsigned char *, unsigned int, int);
 	</programlisting>
 
 	<para>
 All bmdma-style drivers must implement this hook.  This is the low-level
 operation that actually copies the data bytes during a PIO data
 transfer.
-Typically the driver
-will choose one of ata_pio_data_xfer_noirq(), ata_pio_data_xfer(), or
-ata_mmio_data_xfer().
+Typically the driver will choose one of ata_sff_data_xfer_noirq(),
+ata_sff_data_xfer(), or ata_sff_data_xfer32().
 	</para>
 
 	</sect2>
 
 	<sect2><title>ATA command execute</title>
 	<programlisting>
-void (*exec_command)(struct ata_port *ap, struct ata_taskfile *tf);
+void (*sff_exec_command)(struct ata_port *ap, struct ata_taskfile *tf);
 	</programlisting>
 
 	<para>
 	causes an ATA command, previously loaded with
 	->tf_load(), to be initiated in hardware.
-	Most drivers for taskfile-based hardware use ata_exec_command()
+	Most drivers for taskfile-based hardware use ata_sff_exec_command()
 	for this hook.
 	</para>
 
@@ -218,8 +213,8 @@ command.
 
 	<sect2><title>Read specific ATA shadow registers</title>
 	<programlisting>
-u8   (*check_status)(struct ata_port *ap);
-u8   (*check_altstatus)(struct ata_port *ap);
+u8   (*sff_check_status)(struct ata_port *ap);
+u8   (*sff_check_altstatus)(struct ata_port *ap);
 	</programlisting>
 
 	<para>
@@ -227,20 +222,14 @@ u8   (*check_altstatus)(struct ata_port *ap);
 	hardware.  On some hardware, reading the Status register has
 	the side effect of clearing the interrupt condition.
 	Most drivers for taskfile-based hardware use
-	ata_check_status() for this hook.
-	</para>
-	<para>
-	Note that because this is called from ata_device_add(), at
-	least a dummy function that clears device interrupts must be
-	provided for all drivers, even if the controller doesn't
-	actually have a taskfile status register.
+	ata_sff_check_status() for this hook.
 	</para>
 
 	</sect2>
 
 	<sect2><title>Select ATA device on bus</title>
 	<programlisting>
-void (*dev_select)(struct ata_port *ap, unsigned int device);
+void (*sff_dev_select)(struct ata_port *ap, unsigned int device);
 	</programlisting>
 
 	<para>
@@ -251,9 +240,7 @@ void (*dev_select)(struct ata_port *ap, unsigned int device);
 	</para>
 	<para>
 	Most drivers for taskfile-based hardware use
-	ata_std_dev_select() for this hook.  Controllers which do not
-	support second drives on a port (such as SATA contollers) will
-	use ata_noop_dev_select().
+	ata_sff_dev_select() for this hook.
 	</para>
 
 	</sect2>
@@ -441,13 +428,13 @@ void (*irq_clear) (struct ata_port *);
 	to struct ata_host_set.
 	</para>
 	<para>
-	Most legacy IDE drivers use ata_interrupt() for the
+	Most legacy IDE drivers use ata_sff_interrupt() for the
 	irq_handler hook, which scans all ports in the host_set,
 	determines which queued command was active (if any), and calls
-	ata_host_intr(ap,qc).
+	ata_sff_host_intr(ap,qc).
 	</para>
 	<para>
-	Most legacy IDE drivers use ata_bmdma_irq_clear() for the
+	Most legacy IDE drivers use ata_sff_irq_clear() for the
 	irq_clear() hook, which simply clears the interrupt and error
 	flags in the DMA status register.
 	</para>
@@ -496,10 +483,6 @@ void (*host_stop) (struct ata_host_set *host_set);
 	data from port at this time.
 	</para>
 	<para>
-	Many drivers use ata_port_stop() as this hook, which frees the
-	PRD table.
-	</para>
-	<para>
 	->host_stop() is called after all ->port_stop() calls
 have completed.  The hook must finalize hardware shutdown, release DMA
 and other resources, etc.
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
index 8bca1d5cec09..e8473eae2a20 100644
--- a/Documentation/DocBook/tracepoint.tmpl
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -16,6 +16,15 @@
      </address>
     </affiliation>
    </author>
+   <author>
+    <firstname>William</firstname>
+    <surname>Cohen</surname>
+    <affiliation>
+     <address>
+      <email>wcohen@redhat.com</email>
+     </address>
+    </affiliation>
+   </author>
   </authorgroup>
 
   <legalnotice>
@@ -91,4 +100,8 @@
 !Iinclude/trace/events/signal.h
   </chapter>
 
+  <chapter id="block">
+   <title>Block IO</title>
+!Iinclude/trace/events/block.h
+  </chapter>
 </book>
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index f5395af88a41..40ada93b820a 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -234,7 +234,7 @@ process is as follows:
     Linus, usually the patches that have already been included in the
     -next kernel for a few weeks.  The preferred way to submit big changes
     is using git (the kernel's source management tool, more information
-    can be found at http://git.or.cz/) but plain patches are also just
+    can be found at http://git-scm.com/) but plain patches are also just
     fine.
   - After two weeks a -rc1 kernel is released it is now possible to push
     only patches that do not include new features that could affect the
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
index a6d32e65d222..a8536cb88091 100644
--- a/Documentation/RCU/NMI-RCU.txt
+++ b/Documentation/RCU/NMI-RCU.txt
@@ -34,7 +34,7 @@ NMI handler.
 		cpu = smp_processor_id();
 		++nmi_count(cpu);
 
-		if (!rcu_dereference(nmi_callback)(regs, cpu))
+		if (!rcu_dereference_sched(nmi_callback)(regs, cpu))
 			default_do_nmi(regs);
 
 		nmi_exit();
@@ -47,12 +47,13 @@ function pointer.  If this handler returns zero, do_nmi() invokes the
 default_do_nmi() function to handle a machine-specific NMI.  Finally,
 preemption is restored.
 
-Strictly speaking, rcu_dereference() is not needed, since this code runs
-only on i386, which does not need rcu_dereference() anyway.  However,
-it is a good documentation aid, particularly for anyone attempting to
-do something similar on Alpha.
+In theory, rcu_dereference_sched() is not needed, since this code runs
+only on i386, which in theory does not need rcu_dereference_sched()
+anyway.  However, in practice it is a good documentation aid, particularly
+for anyone attempting to do something similar on Alpha or on systems
+with aggressive optimizing compilers.
 
-Quick Quiz:  Why might the rcu_dereference() be necessary on Alpha,
+Quick Quiz:  Why might the rcu_dereference_sched() be necessary on Alpha,
 	     given that the code referenced by the pointer is read-only?
 
 
@@ -99,17 +100,21 @@ invoke irq_enter() and irq_exit() on NMI entry and exit, respectively.
 
 Answer to Quick Quiz
 
-	Why might the rcu_dereference() be necessary on Alpha, given
+	Why might the rcu_dereference_sched() be necessary on Alpha, given
 	that the code referenced by the pointer is read-only?
 
 	Answer: The caller to set_nmi_callback() might well have
-		initialized some data that is to be used by the
-		new NMI handler.  In this case, the rcu_dereference()
-		would be needed, because otherwise a CPU that received
-		an NMI just after the new handler was set might see
-		the pointer to the new NMI handler, but the old
-		pre-initialized version of the handler's data.
-
-		More important, the rcu_dereference() makes it clear
-		to someone reading the code that the pointer is being
-		protected by RCU.
+		initialized some data that is to be used by the new NMI
+		handler.  In this case, the rcu_dereference_sched() would
+		be needed, because otherwise a CPU that received an NMI
+		just after the new handler was set might see the pointer
+		to the new NMI handler, but the old pre-initialized
+		version of the handler's data.
+
+		This same sad story can happen on other CPUs when using
+		a compiler with aggressive pointer-value speculation
+		optimizations.
+
+		More important, the rcu_dereference_sched() makes it
+		clear to someone reading the code that the pointer is
+		being protected by RCU-sched.
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index cbc180f90194..790d1a812376 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -260,7 +260,8 @@ over a rather long period of time, but improvements are always welcome!
 	The reason that it is permissible to use RCU list-traversal
 	primitives when the update-side lock is held is that doing so
 	can be quite helpful in reducing code bloat when common code is
-	shared between readers and updaters.
+	shared between readers and updaters.  Additional primitives
+	are provided for this case, as discussed in lockdep.txt.
 
 10.	Conversely, if you are in an RCU read-side critical section,
 	and you don't hold the appropriate update-side lock, you -must-
@@ -344,8 +345,8 @@ over a rather long period of time, but improvements are always welcome!
 	requiring SRCU's read-side deadlock immunity or low read-side
 	realtime latency.
 
-	Note that, rcu_assign_pointer() and rcu_dereference() relate to
-	SRCU just as they do to other forms of RCU.
+	Note that, rcu_assign_pointer() relates to SRCU just as they do
+	to other forms of RCU.
 
 15.	The whole point of call_rcu(), synchronize_rcu(), and friends
 	is to wait until all pre-existing readers have finished before
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
index fe24b58627bd..d7a49b2f6994 100644
--- a/Documentation/RCU/lockdep.txt
+++ b/Documentation/RCU/lockdep.txt
@@ -32,9 +32,20 @@ checking of rcu_dereference() primitives:
 	srcu_dereference(p, sp):
 		Check for SRCU read-side critical section.
 	rcu_dereference_check(p, c):
-		Use explicit check expression "c".
+		Use explicit check expression "c".  This is useful in
+		code that is invoked by both readers and updaters.
 	rcu_dereference_raw(p)
 		Don't check.  (Use sparingly, if at all.)
+	rcu_dereference_protected(p, c):
+		Use explicit check expression "c", and omit all barriers
+		and compiler constraints.  This is useful when the data
+		structure cannot change, for example, in code that is
+		invoked only by updaters.
+	rcu_access_pointer(p):
+		Return the value of the pointer and omit all barriers,
+		but retain the compiler constraints that prevent duplicating
+		or coalescsing.  This is useful when when testing the
+		value of the pointer itself, for example, against NULL.
 
 The rcu_dereference_check() check expression can be any boolean
 expression, but would normally include one of the rcu_read_lock_held()
@@ -59,7 +70,20 @@ In case (1), the pointer is picked up in an RCU-safe manner for vanilla
 RCU read-side critical sections, in case (2) the ->file_lock prevents
 any change from taking place, and finally, in case (3) the current task
 is the only task accessing the file_struct, again preventing any change
-from taking place.
+from taking place.  If the above statement was invoked only from updater
+code, it could instead be written as follows:
+
+	file = rcu_dereference_protected(fdt->fd[fd],
+					 lockdep_is_held(&files->file_lock) ||
+					 atomic_read(&files->count) == 1);
+
+This would verify cases #2 and #3 above, and furthermore lockdep would
+complain if this was used in an RCU read-side critical section unless one
+of these two cases held.  Because rcu_dereference_protected() omits all
+barriers and compiler constraints, it generates better code than do the
+other flavors of rcu_dereference().  On the other hand, it is illegal
+to use rcu_dereference_protected() if either the RCU-protected pointer
+or the RCU-protected data that it points to can change concurrently.
 
 There are currently only "universal" versions of the rcu_assign_pointer()
 and RCU list-/tree-traversal primitives, which do not (yet) check for
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 1dc00ee97163..cfaac34c4557 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -840,6 +840,12 @@ SRCU:	Initialization/cleanup
 	init_srcu_struct
 	cleanup_srcu_struct
 
+All:  lockdep-checked RCU-protected pointer access
+
+	rcu_dereference_check
+	rcu_dereference_protected
+	rcu_access_pointer
+
 See the comment headers in the source code (or the docbook generated
 from them) for more information.
 
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6fab97ea7e6b..508b5b2b0289 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -1162,8 +1162,8 @@ where a driver received a request ala this before:
 
 As mentioned, there is no virtual mapping of a bio. For DMA, this is
 not a problem as the driver probably never will need a virtual mapping.
-Instead it needs a bus mapping (pci_map_page for a single segment or
-use blk_rq_map_sg for scatter gather) to be able to ship it to the driver. For
+Instead it needs a bus mapping (dma_map_page for a single segment or
+use dma_map_sg for scatter gather) to be able to ship it to the driver. For
 PIO drivers (or drivers that need to revert to PIO transfer once in a
 while (IDE for example)), where the CPU is doing the actual data
 transfer a virtual mapping is needed. If the driver supports highmem I/O,
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index fd588ff0e296..a1ca5924faff 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -235,8 +235,7 @@ containing the following files describing that cgroup:
  - cgroup.procs: list of tgids in the cgroup.  This list is not
    guaranteed to be sorted or free of duplicate tgids, and userspace
    should sort/uniquify the list if this property is required.
-   Writing a tgid into this file moves all threads with that tgid into
-   this cgroup.
+   This is a read-only file, for now.
  - notify_on_release flag: run the release agent on exit?
  - release_agent: the path to use for release notifications (this file
    exists in the top cgroup only)
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index f8bc802d70b9..3a6aecd078ba 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -340,7 +340,7 @@ Note:
 5.3 swappiness
   Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
 
-  Following cgroups' swapiness can't be changed.
+  Following cgroups' swappiness can't be changed.
   - root cgroup (uses /proc/sys/vm/swappiness).
   - a cgroup which uses hierarchy and it has child cgroup.
   - a cgroup which uses hierarchy and not the root of hierarchy.
diff --git a/Documentation/circular-buffers.txt b/Documentation/circular-buffers.txt
new file mode 100644
index 000000000000..8117e5bf6065
--- /dev/null
+++ b/Documentation/circular-buffers.txt
@@ -0,0 +1,234 @@
+			       ================
+			       CIRCULAR BUFFERS
+			       ================
+
+By: David Howells <dhowells@redhat.com>
+    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+
+Linux provides a number of features that can be used to implement circular
+buffering.  There are two sets of such features:
+
+ (1) Convenience functions for determining information about power-of-2 sized
+     buffers.
+
+ (2) Memory barriers for when the producer and the consumer of objects in the
+     buffer don't want to share a lock.
+
+To use these facilities, as discussed below, there needs to be just one
+producer and just one consumer.  It is possible to handle multiple producers by
+serialising them, and to handle multiple consumers by serialising them.
+
+
+Contents:
+
+ (*) What is a circular buffer?
+
+ (*) Measuring power-of-2 buffers.
+
+ (*) Using memory barriers with circular buffers.
+     - The producer.
+     - The consumer.
+
+
+==========================
+WHAT IS A CIRCULAR BUFFER?
+==========================
+
+First of all, what is a circular buffer?  A circular buffer is a buffer of
+fixed, finite size into which there are two indices:
+
+ (1) A 'head' index - the point at which the producer inserts items into the
+     buffer.
+
+ (2) A 'tail' index - the point at which the consumer finds the next item in
+     the buffer.
+
+Typically when the tail pointer is equal to the head pointer, the buffer is
+empty; and the buffer is full when the head pointer is one less than the tail
+pointer.
+
+The head index is incremented when items are added, and the tail index when
+items are removed.  The tail index should never jump the head index, and both
+indices should be wrapped to 0 when they reach the end of the buffer, thus
+allowing an infinite amount of data to flow through the buffer.
+
+Typically, items will all be of the same unit size, but this isn't strictly
+required to use the techniques below.  The indices can be increased by more
+than 1 if multiple items or variable-sized items are to be included in the
+buffer, provided that neither index overtakes the other.  The implementer must
+be careful, however, as a region more than one unit in size may wrap the end of
+the buffer and be broken into two segments.
+
+
+============================
+MEASURING POWER-OF-2 BUFFERS
+============================
+
+Calculation of the occupancy or the remaining capacity of an arbitrarily sized
+circular buffer would normally be a slow operation, requiring the use of a
+modulus (divide) instruction.  However, if the buffer is of a power-of-2 size,
+then a much quicker bitwise-AND instruction can be used instead.
+
+Linux provides a set of macros for handling power-of-2 circular buffers.  These
+can be made use of by:
+
+	#include <linux/circ_buf.h>
+
+The macros are:
+
+ (*) Measure the remaining capacity of a buffer:
+
+	CIRC_SPACE(head_index, tail_index, buffer_size);
+
+     This returns the amount of space left in the buffer[1] into which items
+     can be inserted.
+
+
+ (*) Measure the maximum consecutive immediate space in a buffer:
+
+	CIRC_SPACE_TO_END(head_index, tail_index, buffer_size);
+
+     This returns the amount of consecutive space left in the buffer[1] into
+     which items can be immediately inserted without having to wrap back to the
+     beginning of the buffer.
+
+
+ (*) Measure the occupancy of a buffer:
+
+	CIRC_CNT(head_index, tail_index, buffer_size);
+
+     This returns the number of items currently occupying a buffer[2].
+
+
+ (*) Measure the non-wrapping occupancy of a buffer:
+
+	CIRC_CNT_TO_END(head_index, tail_index, buffer_size);
+
+     This returns the number of consecutive items[2] that can be extracted from
+     the buffer without having to wrap back to the beginning of the buffer.
+
+
+Each of these macros will nominally return a value between 0 and buffer_size-1,
+however:
+
+ [1] CIRC_SPACE*() are intended to be used in the producer.  To the producer
+     they will return a lower bound as the producer controls the head index,
+     but the consumer may still be depleting the buffer on another CPU and
+     moving the tail index.
+
+     To the consumer it will show an upper bound as the producer may be busy
+     depleting the space.
+
+ [2] CIRC_CNT*() are intended to be used in the consumer.  To the consumer they
+     will return a lower bound as the consumer controls the tail index, but the
+     producer may still be filling the buffer on another CPU and moving the
+     head index.
+
+     To the producer it will show an upper bound as the consumer may be busy
+     emptying the buffer.
+
+ [3] To a third party, the order in which the writes to the indices by the
+     producer and consumer become visible cannot be guaranteed as they are
+     independent and may be made on different CPUs - so the result in such a
+     situation will merely be a guess, and may even be negative.
+
+
+===========================================
+USING MEMORY BARRIERS WITH CIRCULAR BUFFERS
+===========================================
+
+By using memory barriers in conjunction with circular buffers, you can avoid
+the need to:
+
+ (1) use a single lock to govern access to both ends of the buffer, thus
+     allowing the buffer to be filled and emptied at the same time; and
+
+ (2) use atomic counter operations.
+
+There are two sides to this: the producer that fills the buffer, and the
+consumer that empties it.  Only one thing should be filling a buffer at any one
+time, and only one thing should be emptying a buffer at any one time, but the
+two sides can operate simultaneously.
+
+
+THE PRODUCER
+------------
+
+The producer will look something like this:
+
+	spin_lock(&producer_lock);
+
+	unsigned long head = buffer->head;
+	unsigned long tail = ACCESS_ONCE(buffer->tail);
+
+	if (CIRC_SPACE(head, tail, buffer->size) >= 1) {
+		/* insert one item into the buffer */
+		struct item *item = buffer[head];
+
+		produce_item(item);
+
+		smp_wmb(); /* commit the item before incrementing the head */
+
+		buffer->head = (head + 1) & (buffer->size - 1);
+
+		/* wake_up() will make sure that the head is committed before
+		 * waking anyone up */
+		wake_up(consumer);
+	}
+
+	spin_unlock(&producer_lock);
+
+This will instruct the CPU that the contents of the new item must be written
+before the head index makes it available to the consumer and then instructs the
+CPU that the revised head index must be written before the consumer is woken.
+
+Note that wake_up() doesn't have to be the exact mechanism used, but whatever
+is used must guarantee a (write) memory barrier between the update of the head
+index and the change of state of the consumer, if a change of state occurs.
+
+
+THE CONSUMER
+------------
+
+The consumer will look something like this:
+
+	spin_lock(&consumer_lock);
+
+	unsigned long head = ACCESS_ONCE(buffer->head);
+	unsigned long tail = buffer->tail;
+
+	if (CIRC_CNT(head, tail, buffer->size) >= 1) {
+		/* read index before reading contents at that index */
+		smp_read_barrier_depends();
+
+		/* extract one item from the buffer */
+		struct item *item = buffer[tail];
+
+		consume_item(item);
+
+		smp_mb(); /* finish reading descriptor before incrementing tail */
+
+		buffer->tail = (tail + 1) & (buffer->size - 1);
+	}
+
+	spin_unlock(&consumer_lock);
+
+This will instruct the CPU to make sure the index is up to date before reading
+the new item, and then it shall make sure the CPU has finished reading the item
+before it writes the new tail pointer, which will erase the item.
+
+
+Note the use of ACCESS_ONCE() in both algorithms to read the opposition index.
+This prevents the compiler from discarding and reloading its cached value -
+which some compilers will do across smp_read_barrier_depends().  This isn't
+strictly needed if you can be sure that the opposition index will _only_ be
+used the once.
+
+
+===============
+FURTHER READING
+===============
+
+See also Documentation/memory-barriers.txt for a description of Linux's memory
+barrier facilities.
diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index b07add3467f1..7764594778d4 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/timer.h>
 
 #include <linux/connector.h>
diff --git a/Documentation/fb/imacfb.txt b/Documentation/fb/efifb.txt
index 316ec9bb7deb..a59916c29b33 100644
--- a/Documentation/fb/imacfb.txt
+++ b/Documentation/fb/efifb.txt
@@ -1,9 +1,9 @@
 
-What is imacfb?
+What is efifb?
 ===============
 
 This is a generic EFI platform driver for Intel based Apple computers.
-Imacfb is only for EFI booted Intel Macs.
+efifb is only for EFI booted Intel Macs.
 
 Supported Hardware
 ==================
@@ -16,16 +16,16 @@ MacMini
 How to use it?
 ==============
 
-Imacfb does not have any kind of autodetection of your machine.
+efifb does not have any kind of autodetection of your machine.
 You have to add the following kernel parameters in your elilo.conf:
 	Macbook :
-		video=imacfb:macbook
+		video=efifb:macbook
 	MacMini :
-		video=imacfb:mini
+		video=efifb:mini
 	Macbook Pro 15", iMac 17" :
-		video=imacfb:i17
+		video=efifb:i17
 	Macbook Pro 17", iMac 20" :
-		video=imacfb:i20
+		video=efifb:i20
 
 --
 Edgar Hucek <gimli@dark-green.com>
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ed511af0f79a..a5e381185de9 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -520,29 +520,6 @@ Who:	Hans de Goede <hdegoede@redhat.com>
 
 ----------------------------
 
-What:	corgikbd, spitzkbd, tosakbd driver
-When:	2.6.35
-Files:	drivers/input/keyboard/{corgi,spitz,tosa}kbd.c
-Why:	We now have a generic GPIO based matrix keyboard driver that
-	are fully capable of handling all the keys on these devices.
-	The original drivers manipulate the GPIO registers directly
-	and so are difficult to maintain.
-Who:	Eric Miao <eric.y.miao@gmail.com>
-
-----------------------------
-
-What:	corgi_ssp and corgi_ts driver
-When:	2.6.35
-Files:	arch/arm/mach-pxa/corgi_ssp.c, drivers/input/touchscreen/corgi_ts.c
-Why:	The corgi touchscreen is now deprecated in favour of the generic
-	ads7846.c driver. The noise reduction technique used in corgi_ts.c,
-	that's to wait till vsync before ADC sampling, is also integrated into
-	ads7846 driver now. Provided that the original driver is not generic
-	and is difficult to maintain, it will be removed later.
-Who:	Eric Miao <eric.y.miao@gmail.com>
-
-----------------------------
-
 What:	capifs
 When:	February 2011
 Files:	drivers/isdn/capi/capifs.*
@@ -589,3 +566,26 @@ Why:	Useful in 2003, implementation is a hack.
 	Generally invoked by accident today.
 	Seen as doing more harm than good.
 Who:	Len Brown <len.brown@intel.com>
+
+----------------------------
+
+What:	video4linux /dev/vtx teletext API support
+When:	2.6.35
+Files:	drivers/media/video/saa5246a.c drivers/media/video/saa5249.c
+	include/linux/videotext.h
+Why:	The vtx device nodes have been superseded by vbi device nodes
+	for many years. No applications exist that use the vtx support.
+	Of the two i2c drivers that actually support this API the saa5249
+	has been impossible to use for a year now and no known hardware
+	that supports this device exists. The saa5246a is theoretically
+	supported by the old mxb boards, but it never actually worked.
+
+	In summary: there is no hardware that can use this API and there
+	are no applications actually implementing this API.
+
+	The vtx support still reserves minors 192-223 and we would really
+	like to reuse those for upcoming new functionality. In the unlikely
+	event that new hardware appears that wants to use the functionality
+	provided by the vtx API, then that functionality should be build
+	around the sliced VBI API instead.
+Who:	Hans Verkuil <hverkuil@xs4all.nl>
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 3bae418c6ad3..4303614b5add 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -16,6 +16,8 @@ befs.txt
 	- information about the BeOS filesystem for Linux.
 bfs.txt
 	- info for the SCO UnixWare Boot Filesystem (BFS).
+ceph.txt
+	- info for the Ceph Distributed File System
 cifs.txt
 	- description of the CIFS filesystem.
 coda.txt
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
index 57e0b80a5274..c0236e753bc8 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.txt
@@ -37,6 +37,15 @@ For Plan 9 From User Space applications (http://swtch.com/plan9)
 
 	mount -t 9p `namespace`/acme /mnt/9 -o trans=unix,uname=$USER
 
+For server running on QEMU host with virtio transport:
+
+	mount -t 9p -o trans=virtio <mount_tag> /mnt/9
+
+where mount_tag is the tag associated by the server to each of the exported
+mount points. Each 9P export is seen by the client as a virtio device with an
+associated "mount_tag" property. Available mount tags can be
+seen by reading /sys/bus/virtio/drivers/9pnet_virtio/virtio<n>/mount_tag files.
+
 OPTIONS
 =======
 
@@ -47,7 +56,7 @@ OPTIONS
 			fd   	- used passed file descriptors for connection
                                 (see rfdno and wfdno)
 			virtio	- connect to the next virtio channel available
-				(from lguest or KVM with trans_virtio module)
+				(from QEMU with trans_virtio module)
 			rdma	- connect to a specified RDMA channel
 
   uname=name	user name to attempt mount as on the remote server.  The
@@ -85,7 +94,12 @@ OPTIONS
 
   port=n	port to connect to on the remote server
 
-  noextend	force legacy mode (no 9p2000.u semantics)
+  noextend	force legacy mode (no 9p2000.u or 9p2000.L semantics)
+
+  version=name	Select 9P protocol version. Valid options are:
+			9p2000          - Legacy mode (same as noextend)
+			9p2000.u        - Use 9P2000.u protocol
+			9p2000.L        - Use 9P2000.L protocol
 
   dfltuid	attempt to mount as a particular uid
 
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
new file mode 100644
index 000000000000..0660c9f5deef
--- /dev/null
+++ b/Documentation/filesystems/ceph.txt
@@ -0,0 +1,140 @@
+Ceph Distributed File System
+============================
+
+Ceph is a distributed network file system designed to provide good
+performance, reliability, and scalability.
+
+Basic features include:
+
+ * POSIX semantics
+ * Seamless scaling from 1 to many thousands of nodes
+ * High availability and reliability.  No single point of failure.
+ * N-way replication of data across storage nodes
+ * Fast recovery from node failures
+ * Automatic rebalancing of data on node addition/removal
+ * Easy deployment: most FS components are userspace daemons
+
+Also,
+ * Flexible snapshots (on any directory)
+ * Recursive accounting (nested files, directories, bytes)
+
+In contrast to cluster filesystems like GFS, OCFS2, and GPFS that rely
+on symmetric access by all clients to shared block devices, Ceph
+separates data and metadata management into independent server
+clusters, similar to Lustre.  Unlike Lustre, however, metadata and
+storage nodes run entirely as user space daemons.  Storage nodes
+utilize btrfs to store data objects, leveraging its advanced features
+(checksumming, metadata replication, etc.).  File data is striped
+across storage nodes in large chunks to distribute workload and
+facilitate high throughputs.  When storage nodes fail, data is
+re-replicated in a distributed fashion by the storage nodes themselves
+(with some minimal coordination from a cluster monitor), making the
+system extremely efficient and scalable.
+
+Metadata servers effectively form a large, consistent, distributed
+in-memory cache above the file namespace that is extremely scalable,
+dynamically redistributes metadata in response to workload changes,
+and can tolerate arbitrary (well, non-Byzantine) node failures.  The
+metadata server takes a somewhat unconventional approach to metadata
+storage to significantly improve performance for common workloads.  In
+particular, inodes with only a single link are embedded in
+directories, allowing entire directories of dentries and inodes to be
+loaded into its cache with a single I/O operation.  The contents of
+extremely large directories can be fragmented and managed by
+independent metadata servers, allowing scalable concurrent access.
+
+The system offers automatic data rebalancing/migration when scaling
+from a small cluster of just a few nodes to many hundreds, without
+requiring an administrator carve the data set into static volumes or
+go through the tedious process of migrating data between servers.
+When the file system approaches full, new nodes can be easily added
+and things will "just work."
+
+Ceph includes flexible snapshot mechanism that allows a user to create
+a snapshot on any subdirectory (and its nested contents) in the
+system.  Snapshot creation and deletion are as simple as 'mkdir
+.snap/foo' and 'rmdir .snap/foo'.
+
+Ceph also provides some recursive accounting on directories for nested
+files and bytes.  That is, a 'getfattr -d foo' on any directory in the
+system will reveal the total number of nested regular files and
+subdirectories, and a summation of all nested file sizes.  This makes
+the identification of large disk space consumers relatively quick, as
+no 'du' or similar recursive scan of the file system is required.
+
+
+Mount Syntax
+============
+
+The basic mount syntax is:
+
+ # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
+
+You only need to specify a single monitor, as the client will get the
+full list when it connects.  (However, if the monitor you specify
+happens to be down, the mount won't succeed.)  The port can be left
+off if the monitor is using the default.  So if the monitor is at
+1.2.3.4,
+
+ # mount -t ceph 1.2.3.4:/ /mnt/ceph
+
+is sufficient.  If /sbin/mount.ceph is installed, a hostname can be
+used instead of an IP address.
+
+
+
+Mount Options
+=============
+
+  ip=A.B.C.D[:N]
+	Specify the IP and/or port the client should bind to locally.
+	There is normally not much reason to do this.  If the IP is not
+	specified, the client's IP address is determined by looking at the
+	address it's connection to the monitor originates from.
+
+  wsize=X
+	Specify the maximum write size in bytes.  By default there is no
+	maximum.  Ceph will normally size writes based on the file stripe
+	size.
+
+  rsize=X
+	Specify the maximum readahead.
+
+  mount_timeout=X
+	Specify the timeout value for mount (in seconds), in the case
+	of a non-responsive Ceph file system.  The default is 30
+	seconds.
+
+  rbytes
+	When stat() is called on a directory, set st_size to 'rbytes',
+	the summation of file sizes over all files nested beneath that
+	directory.  This is the default.
+
+  norbytes
+	When stat() is called on a directory, set st_size to the
+	number of entries in that directory.
+
+  nocrc
+	Disable CRC32C calculation for data writes.  If set, the storage node
+	must rely on TCP's error correction to detect data corruption
+	in the data payload.
+
+  noasyncreaddir
+	Disable client's use its local cache to satisfy	readdir
+	requests.  (This does not change correctness; the client uses
+	cached metadata only when a lease or capability ensures it is
+	valid.)
+
+
+More Information
+================
+
+For more information on Ceph, see the home page at
+	http://ceph.newdream.net/
+
+The Linux kernel client source tree is available at
+	git://ceph.newdream.net/git/ceph-client.git
+	git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+
+and the source for the full system is at
+	git://ceph.newdream.net/git/ceph.git
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index 3015da0c6b2a..fe09a2cb1858 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -82,11 +82,13 @@ tmpfs has a mount option to set the NUMA memory allocation policy for
 all files in that instance (if CONFIG_NUMA is enabled) - which can be
 adjusted on the fly via 'mount -o remount ...'
 
-mpol=default             prefers to allocate memory from the local node
+mpol=default             use the process allocation policy
+                         (see set_mempolicy(2))
 mpol=prefer:Node         prefers to allocate memory from the given Node
 mpol=bind:NodeList       allocates memory only from nodes in NodeList
 mpol=interleave          prefers to allocate from each node in turn
 mpol=interleave:NodeList allocates from each node of NodeList in turn
+mpol=local		 prefers to allocate memory from the local node
 
 NodeList format is a comma-separated list of decimal numbers and ranges,
 a range being two hyphen-separated decimal numbers, the smallest and
@@ -134,3 +136,5 @@ Author:
    Christoph Rohland <cr@sap.com>, 1.12.01
 Updated:
    Hugh Dickins, 4 June 2007
+Updated:
+   KOSAKI Motohiro, 16 Mar 2010
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index 3219ee0dbfef..5ebf5af1d716 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -74,6 +74,11 @@ structure at all.  You should use this to keep device-specific data.
 	/* retrieve the value */
 	void *i2c_get_clientdata(const struct i2c_client *client);
 
+Note that starting with kernel 2.6.34, you don't have to set the `data' field
+to NULL in remove() or if probe() failed anymore. The i2c-core does this
+automatically on these occasions. Those are also the only times the core will
+touch this field.
+
 
 Accessing the client
 ====================
diff --git a/Documentation/input/elantech.txt b/Documentation/input/elantech.txt
index a10c3b6ba7c4..56941ae1f5db 100644
--- a/Documentation/input/elantech.txt
+++ b/Documentation/input/elantech.txt
@@ -333,14 +333,14 @@ byte 0:
 byte 1:
 
    bit   7   6   5   4   3   2   1   0
-        x15 x14 x13 x12 x11 x10 x9  x8
+         .   .   .   .   .  x10 x9  x8
 
 byte 2:
 
    bit   7   6   5   4   3   2   1   0
         x7  x6  x5  x4  x4  x2  x1  x0
 
-         x15..x0 = absolute x value (horizontal)
+         x10..x0 = absolute x value (horizontal)
 
 byte 3:
 
@@ -350,14 +350,14 @@ byte 3:
 byte 4:
 
    bit   7   6   5   4   3   2   1   0
-        y15 y14 y13 y12 y11 y10 y8  y8
+         .   .   .   .   .   .  y9  y8
 
 byte 5:
 
    bit   7   6   5   4   3   2   1   0
         y7  y6  y5  y4  y3  y2  y1  y0
 
-         y15..y0 = absolute y value (vertical)
+         y9..y0 = absolute y value (vertical)
 
 
 4.2.2 Two finger touch
diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt
index 8490480ce432..c0fc1c75fd88 100644
--- a/Documentation/input/multi-touch-protocol.txt
+++ b/Documentation/input/multi-touch-protocol.txt
@@ -68,6 +68,22 @@ like:
    SYN_MT_REPORT
    SYN_REPORT
 
+Here is the sequence after lifting one of the fingers:
+
+   ABS_MT_POSITION_X
+   ABS_MT_POSITION_Y
+   SYN_MT_REPORT
+   SYN_REPORT
+
+And here is the sequence after lifting the remaining finger:
+
+   SYN_MT_REPORT
+   SYN_REPORT
+
+If the driver reports one of BTN_TOUCH or ABS_PRESSURE in addition to the
+ABS_MT events, the last SYN_MT_REPORT event may be omitted. Otherwise, the
+last SYN_REPORT will be dropped by the input core, resulting in no
+zero-finger event reaching userland.
 
 Event Semantics
 ---------------
@@ -217,11 +233,6 @@ where examples can be found.
 difference between the contact position and the approaching tool position
 could be used to derive tilt.
 [2] The list can of course be extended.
-[3] The multi-touch X driver is currently in the prototyping stage. At the
-time of writing (April 2009), the MT protocol is not yet merged, and the
-prototype implements finger matching, basic mouse support and two-finger
-scrolling. The project aims at improving the quality of current multi-touch
-functionality available in the Synaptics X driver, and in addition
-implement more advanced gestures.
+[3] Multitouch X driver project: http://bitmath.org/code/multitouch/.
 [4] See the section on event computation.
 [5] See the section on finger tracking.
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 35c9b51d20ea..dd5806f4fcc4 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -291,6 +291,7 @@ Code  Seq#(hex)	Include File		Comments
 0x92	00-0F	drivers/usb/mon/mon_bin.c
 0x93	60-7F	linux/auto_fs.h
 0x94	all	fs/btrfs/ioctl.h
+0x97	00-7F	fs/ceph/ioctl.h		Ceph file system
 0x99	00-0F				537-Addinboard driver
 					<mailto:buk@buks.ipn.de>
 0xA0	all	linux/sdp/sdp.h		Industrial Device Project
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e4cbca58536c..839b21b0699a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -320,11 +320,6 @@ and is between 256 and 4096 characters. It is defined in the file
 	amd_iommu=	[HW,X86-84]
 			Pass parameters to the AMD IOMMU driver in the system.
 			Possible values are:
-			isolate - enable device isolation (each device, as far
-			          as possible, will get its own protection
-			          domain) [default]
-			share - put every device behind one IOMMU into the
-				same protection domain
 			fullflush - enable flushing of IO/TLB entries when
 				    they are unmapped. Otherwise they are
 				    flushed before they will be reused, which
@@ -1199,7 +1194,7 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	libata.force=	[LIBATA] Force configurations.  The format is comma
 			separated list of "[ID:]VAL" where ID is
-			PORT[:DEVICE].  PORT and DEVICE are decimal numbers
+			PORT[.DEVICE].  PORT and DEVICE are decimal numbers
 			matching port, link or device.  Basically, it matches
 			the ATA ID string printed on console by libata.  If
 			the whole ID part is omitted, the last PORT and DEVICE
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index bdb13817e1e9..3ab2472509cb 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -59,37 +59,56 @@ nice to have in other objects.  The C language does not allow for the
 direct expression of inheritance, so other techniques - such as structure
 embedding - must be used.
 
-So, for example, the UIO code has a structure that defines the memory
-region associated with a uio device:
+(As an aside, for those familiar with the kernel linked list implementation,
+this is analogous as to how "list_head" structs are rarely useful on
+their own, but are invariably found embedded in the larger objects of
+interest.)
 
-struct uio_mem {
+So, for example, the UIO code in drivers/uio/uio.c has a structure that
+defines the memory region associated with a uio device:
+
+    struct uio_map {
 	struct kobject kobj;
-	unsigned long addr;
-	unsigned long size;
-	int memtype;
-	void __iomem *internal_addr;
-};
+	struct uio_mem *mem;
+    };
 
-If you have a struct uio_mem structure, finding its embedded kobject is
+If you have a struct uio_map structure, finding its embedded kobject is
 just a matter of using the kobj member.  Code that works with kobjects will
 often have the opposite problem, however: given a struct kobject pointer,
 what is the pointer to the containing structure?  You must avoid tricks
 (such as assuming that the kobject is at the beginning of the structure)
 and, instead, use the container_of() macro, found in <linux/kernel.h>:
 
-	container_of(pointer, type, member)
+    container_of(pointer, type, member)
+
+where:
+
+  * "pointer" is the pointer to the embedded kobject,
+  * "type" is the type of the containing structure, and
+  * "member" is the name of the structure field to which "pointer" points.
+
+The return value from container_of() is a pointer to the corresponding
+container type. So, for example, a pointer "kp" to a struct kobject
+embedded *within* a struct uio_map could be converted to a pointer to the
+*containing* uio_map structure with:
+
+    struct uio_map *u_map = container_of(kp, struct uio_map, kobj);
+
+For convenience, programmers often define a simple macro for "back-casting"
+kobject pointers to the containing type.  Exactly this happens in the
+earlier drivers/uio/uio.c, as you can see here:
+
+    struct uio_map {
+        struct kobject kobj;
+        struct uio_mem *mem;
+    };
 
-where pointer is the pointer to the embedded kobject, type is the type of
-the containing structure, and member is the name of the structure field to
-which pointer points.  The return value from container_of() is a pointer to
-the given type. So, for example, a pointer "kp" to a struct kobject
-embedded within a struct uio_mem could be converted to a pointer to the
-containing uio_mem structure with:
+    #define to_map(map) container_of(map, struct uio_map, kobj)
 
-    struct uio_mem *u_mem = container_of(kp, struct uio_mem, kobj);
+where the macro argument "map" is a pointer to the struct kobject in
+question.  That macro is subsequently invoked with:
 
-Programmers often define a simple macro for "back-casting" kobject pointers
-to the containing type.
+    struct uio_map *map = to_map(kobj);
 
 
 Initialization of kobjects
@@ -387,4 +406,5 @@ called, and the objects in the former circle release each other.
 Example code to copy from
 
 For a more complete example of using ksets and kobjects properly, see the
-sample/kobject/kset-example.c code.
+example programs samples/kobject/{kobject-example.c,kset-example.c},
+which will be built as loadable modules if you select CONFIG_SAMPLE_KOBJECT.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 7f5809eddee6..631ad2f1b229 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -3,6 +3,7 @@
 			 ============================
 
 By: David Howells <dhowells@redhat.com>
+    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 
 Contents:
 
@@ -60,6 +61,10 @@ Contents:
 
      - And then there's the Alpha.
 
+ (*) Example uses.
+
+     - Circular buffers.
+
  (*) References.
 
 
@@ -2226,6 +2231,21 @@ The Alpha defines the Linux kernel's memory barrier model.
 See the subsection on "Cache Coherency" above.
 
 
+============
+EXAMPLE USES
+============
+
+CIRCULAR BUFFERS
+----------------
+
+Memory barriers can be used to implement circular buffering without the need
+of a lock to serialise the producer with the consumer.  See:
+
+	Documentation/circular-buffers.txt
+
+for details.
+
+
 ==========
 REFERENCES
 ==========
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
new file mode 100644
index 000000000000..7ee770b5ef5f
--- /dev/null
+++ b/Documentation/networking/stmmac.txt
@@ -0,0 +1,143 @@
+       STMicroelectronics 10/100/1000 Synopsys Ethernet driver
+
+Copyright (C) 2007-2010  STMicroelectronics Ltd
+Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+
+This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
+(Synopsys IP blocks); it has been fully tested on STLinux platforms.
+
+Currently this network device driver is for all STM embedded MAC/GMAC
+(7xxx SoCs).
+
+DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100
+Universal version 4.0 have been used for developing the first code
+implementation.
+
+Please, for more information also visit: www.stlinux.com
+
+1) Kernel Configuration
+The kernel configuration option is STMMAC_ETH:
+ Device Drivers ---> Network device support ---> Ethernet (1000 Mbit) --->
+ STMicroelectronics 10/100/1000 Ethernet driver (STMMAC_ETH)
+
+2) Driver parameters list:
+	debug: message level (0: no output, 16: all);
+	phyaddr: to manually provide the physical address to the PHY device;
+	dma_rxsize: DMA rx ring size;
+	dma_txsize: DMA tx ring size;
+	buf_sz: DMA buffer size;
+	tc: control the HW FIFO threshold;
+	tx_coe: Enable/Disable Tx Checksum Offload engine;
+	watchdog: transmit timeout (in milliseconds);
+	flow_ctrl: Flow control ability [on/off];
+	pause: Flow Control Pause Time;
+	tmrate: timer period (only if timer optimisation is configured).
+
+3) Command line options
+Driver parameters can be also passed in command line by using:
+	stmmaceth=dma_rxsize:128,dma_txsize:512
+
+4) Driver information and notes
+
+4.1) Transmit process
+The xmit method is invoked when the kernel needs to transmit a packet; it sets
+the descriptors in the ring and informs the DMA engine that there is a packet
+ready to be transmitted.
+Once the controller has finished transmitting the packet, an interrupt is
+triggered; So the driver will be able to release the socket buffers.
+By default, the driver sets the NETIF_F_SG bit in the features field of the
+net_device structure enabling the scatter/gather feature.
+
+4.2) Receive process
+When one or more packets are received, an interrupt happens. The interrupts
+are not queued so the driver has to scan all the descriptors in the ring during
+the receive process.
+This is based on NAPI so the interrupt handler signals only if there is work to be
+done, and it exits.
+Then the poll method will be scheduled at some future point.
+The incoming packets are stored, by the DMA, in a list of pre-allocated socket
+buffers in order to avoid the memcpy (Zero-copy).
+
+4.3) Timer-Driver Interrupt
+Instead of having the device that asynchronously notifies the frame receptions, the
+driver configures a timer to generate an interrupt at regular intervals.
+Based on the granularity of the timer, the frames that are received by the device
+will experience different levels of latency. Some NICs have dedicated timer
+device to perform this task. STMMAC can use either the RTC device or the TMU
+channel 2  on STLinux platforms.
+The timers frequency can be passed to the driver as parameter; when change it,
+take care of both hardware capability and network stability/performance impact.
+Several performance tests on STM platforms showed this optimisation allows to spare
+the CPU while having the maximum throughput.
+
+4.4) WOL
+Wake up on Lan feature through Magic Frame is only supported for the GMAC
+core.
+
+4.5) DMA descriptors
+Driver handles both normal and enhanced descriptors. The latter has been only
+tested on DWC Ether MAC 10/100/1000 Universal version 3.41a.
+
+4.6) Ethtool support
+Ethtool is supported. Driver statistics and internal errors can be taken using:
+ethtool -S ethX command. It is possible to dump registers etc.
+
+4.7) Jumbo and Segmentation Offloading
+Jumbo frames are supported and tested for the GMAC.
+The GSO has been also added but it's performed in software.
+LRO is not supported.
+
+4.8) Physical
+The driver is compatible with PAL to work with PHY and GPHY devices.
+
+4.9) Platform information
+Several information came from the platform; please refer to the
+driver's Header file in include/linux directory.
+
+struct plat_stmmacenet_data {
+        int bus_id;
+        int pbl;
+        int has_gmac;
+        void (*fix_mac_speed)(void *priv, unsigned int speed);
+        void (*bus_setup)(unsigned long ioaddr);
+#ifdef CONFIG_STM_DRIVERS
+        struct stm_pad_config *pad_config;
+#endif
+        void *bsp_priv;
+};
+
+Where:
+- pbl (Programmable Burst Length) is maximum number of
+  beats to be transferred in one DMA transaction.
+  GMAC also enables the 4xPBL by default.
+- fix_mac_speed and bus_setup are used to configure internal target
+  registers (on STM platforms);
+- has_gmac: GMAC core is on board (get it at run-time in the next step);
+- bus_id: bus identifier.
+
+struct plat_stmmacphy_data {
+        int bus_id;
+        int phy_addr;
+        unsigned int phy_mask;
+        int interface;
+        int (*phy_reset)(void *priv);
+        void *priv;
+};
+
+Where:
+- bus_id: bus identifier;
+- phy_addr: physical address used for the attached phy device;
+            set it to -1 to get it at run-time;
+- interface: physical MII interface mode;
+- phy_reset: hook to reset HW function.
+
+TODO:
+- Continue to make the driver more generic and suitable for other Synopsys
+  Ethernet controllers used on other architectures (i.e. ARM).
+- 10G controllers are not supported.
+- MAC uses Normal descriptors and GMAC uses enhanced ones.
+  This is a limit that should be reviewed. MAC could want to
+  use the enhanced structure.
+- Checksumming: Rx/Tx csum is done in HW in case of GMAC only.
+- Review the timer optimisation code to use an embedded device that seems to be
+  available in new chip generations.
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index 0e58b4539176..e8c8f4f06c67 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -41,11 +41,12 @@ SOF_TIMESTAMPING_SOFTWARE:     return system time stamp generated in
 SOF_TIMESTAMPING_TX/RX determine how time stamps are generated.
 SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the
 following control message:
-    struct scm_timestamping {
-           struct timespec systime;
-           struct timespec hwtimetrans;
-           struct timespec hwtimeraw;
-    };
+
+struct scm_timestamping {
+	struct timespec systime;
+	struct timespec hwtimetrans;
+	struct timespec hwtimeraw;
+};
 
 recvmsg() can be used to get this control message for regular incoming
 packets. For send time stamps the outgoing packet is looped back to
@@ -87,12 +88,13 @@ by the network device and will be empty without that support.
 SIOCSHWTSTAMP:
 
 Hardware time stamping must also be initialized for each device driver
-that is expected to do hardware time stamping. The parameter is:
+that is expected to do hardware time stamping. The parameter is defined in
+/include/linux/net_tstamp.h as:
 
 struct hwtstamp_config {
-    int flags;           /* no flags defined right now, must be zero */
-    int tx_type;         /* HWTSTAMP_TX_* */
-    int rx_filter;       /* HWTSTAMP_FILTER_* */
+	int flags;	/* no flags defined right now, must be zero */
+	int tx_type;	/* HWTSTAMP_TX_* */
+	int rx_filter;	/* HWTSTAMP_FILTER_* */
 };
 
 Desired behavior is passed into the kernel and to a specific device by
@@ -139,42 +141,56 @@ enum {
 	/* time stamp any incoming packet */
 	HWTSTAMP_FILTER_ALL,
 
-        /* return value: time stamp all packets requested plus some others */
-        HWTSTAMP_FILTER_SOME,
+	/* return value: time stamp all packets requested plus some others */
+	HWTSTAMP_FILTER_SOME,
 
 	/* PTP v1, UDP, any kind of event packet */
 	HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
 
-        ...
+	/* for the complete list of values, please check
+	 * the include file /include/linux/net_tstamp.h
+	 */
 };
 
 
 DEVICE IMPLEMENTATION
 
 A driver which supports hardware time stamping must support the
-SIOCSHWTSTAMP ioctl. Time stamps for received packets must be stored
-in the skb with skb_hwtstamp_set().
+SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with
+the actual values as described in the section on SIOCSHWTSTAMP.
+
+Time stamps for received packets must be stored in the skb. To get a pointer
+to the shared time stamp structure of the skb call skb_hwtstamps(). Then
+set the time stamps in the structure:
+
+struct skb_shared_hwtstamps {
+	/* hardware time stamp transformed into duration
+	 * since arbitrary point in time
+	 */
+	ktime_t	hwtstamp;
+	ktime_t	syststamp; /* hwtstamp transformed to system time base */
+};
 
 Time stamps for outgoing packets are to be generated as follows:
-- In hard_start_xmit(), check if skb_hwtstamp_check_tx_hardware()
-  returns non-zero. If yes, then the driver is expected
-  to do hardware time stamping.
+- In hard_start_xmit(), check if skb_tx(skb)->hardware is set no-zero.
+  If yes, then the driver is expected to do hardware time stamping.
 - If this is possible for the skb and requested, then declare
-  that the driver is doing the time stamping by calling
-  skb_hwtstamp_tx_in_progress(). A driver not supporting
-  hardware time stamping doesn't do that. A driver must never
-  touch sk_buff::tstamp! It is used to store how time stamping
-  for an outgoing packets is to be done.
+  that the driver is doing the time stamping by setting the field
+  skb_tx(skb)->in_progress non-zero. You might want to keep a pointer
+  to the associated skb for the next step and not free the skb. A driver
+  not supporting hardware time stamping doesn't do that. A driver must
+  never touch sk_buff::tstamp! It is used to store software generated
+  time stamps by the network subsystem.
 - As soon as the driver has sent the packet and/or obtained a
   hardware time stamp for it, it passes the time stamp back by
   calling skb_hwtstamp_tx() with the original skb, the raw
-  hardware time stamp and a handle to the device (necessary
-  to convert the hardware time stamp to system time). If obtaining
-  the hardware time stamp somehow fails, then the driver should
-  not fall back to software time stamping. The rationale is that
-  this would occur at a later time in the processing pipeline
-  than other software time stamping and therefore could lead
-  to unexpected deltas between time stamps.
-- If the driver did not call skb_hwtstamp_tx_in_progress(), then
+  hardware time stamp. skb_hwtstamp_tx() clones the original skb and
+  adds the timestamps, therefore the original skb has to be freed now.
+  If obtaining the hardware time stamp somehow fails, then the driver
+  should not fall back to software time stamping. The rationale is that
+  this would occur at a later time in the processing pipeline than other
+  software time stamping and therefore could lead to unexpected deltas
+  between time stamps.
+- If the driver did not call set skb_tx(skb)->in_progress, then
   dev_hard_start_xmit() checks whether software time stamping
   is wanted as fallback and potentially generates the time stamp.
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt
index 6e37be1eeb2d..4f8930263dd9 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt
@@ -21,6 +21,15 @@ Required properties:
 - fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the
   threads.
 
+Optional properties:
+- fsl,firmware-phandle:
+    Usage: required only if there is no fsl,qe-firmware child node
+    Value type: <phandle>
+    Definition: Points to a firmware node (see "QE Firmware Node" below)
+        that contains the firmware that should be uploaded for this QE.
+        The compatible property for the firmware node should say,
+        "fsl,qe-firmware".
+
 Recommended properties
 - brg-frequency : the internal clock source frequency for baud-rate
   generators in Hz.
@@ -59,3 +68,48 @@ Example:
 		reg = <0 c000>;
 	};
      };
+
+* QE Firmware Node
+
+This node defines a firmware binary that is embedded in the device tree, for
+the purpose of passing the firmware from bootloader to the kernel, or from
+the hypervisor to the guest.
+
+The firmware node itself contains the firmware binary contents, a compatible
+property, and any firmware-specific properties.  The node should be placed
+inside a QE node that needs it.  Doing so eliminates the need for a
+fsl,firmware-phandle property.  Other QE nodes that need the same firmware
+should define an fsl,firmware-phandle property that points to the firmware node
+in the first QE node.
+
+The fsl,firmware property can be specified in the DTS (possibly using incbin)
+or can be inserted by the boot loader at boot time.
+
+Required properties:
+  - compatible
+      Usage: required
+      Value type: <string>
+      Definition: A standard property.  Specify a string that indicates what
+          kind of firmware it is.  For QE, this should be "fsl,qe-firmware".
+
+   - fsl,firmware
+      Usage: required
+      Value type: <prop-encoded-array>, encoded as an array of bytes
+      Definition: A standard property.  This property contains the firmware
+          binary "blob".
+
+Example:
+	qe1@e0080000 {
+		compatible = "fsl,qe";
+		qe_firmware:qe-firmware {
+			compatible = "fsl,qe-firmware";
+			fsl,firmware = [0x70 0xcd 0x00 0x00 0x01 0x46 0x45 ...];
+		};
+		...
+	};
+
+	qe2@e0090000 {
+		compatible = "fsl,qe";
+		fsl,firmware-phandle = <&qe_firmware>;
+		...
+	};
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt
index f4dd3bf99d12..98d14cb8a85d 100644
--- a/Documentation/sound/alsa/HD-Audio.txt
+++ b/Documentation/sound/alsa/HD-Audio.txt
@@ -119,10 +119,18 @@ the codec slots 0 and 1 no matter what the hardware reports.
 
 Interrupt Handling
 ~~~~~~~~~~~~~~~~~~
-In rare but some cases, the interrupt isn't properly handled as
-default.  You would notice this by the DMA transfer error reported by
-ALSA PCM core, for example.  Using MSI might help in such a case.
-Pass `enable_msi=1` option for enabling MSI.
+HD-audio driver uses MSI as default (if available) since 2.6.33
+kernel as MSI works better on some machines, and in general, it's
+better for performance.  However, Nvidia controllers showed bad
+regressions with MSI (especially in a combination with AMD chipset),
+thus we disabled MSI for them.
+
+There seem also still other devices that don't work with MSI.  If you
+see a regression wrt the sound quality (stuttering, etc) or a lock-up
+in the recent kernel, try to pass `enable_msi=0` option to disable
+MSI.  If it works, you can add the known bad device to the blacklist
+defined in hda_intel.c.  In such a case, please report and give the
+patch back to the upstream developer. 
 
 
 HD-AUDIO CODEC
diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c
index 10abd3773e49..16feda901469 100644
--- a/Documentation/spi/spidev_test.c
+++ b/Documentation/spi/spidev_test.c
@@ -58,7 +58,7 @@ static void transfer(int fd)
 	};
 
 	ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr);
-	if (ret == 1)
+	if (ret < 1)
 		pabort("can't send spi message");
 
 	for (ret = 0; ret < ARRAY_SIZE(tx); ret++) {
diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt
index 5effa5bd993b..e213f45cf9d7 100644
--- a/Documentation/stable_kernel_rules.txt
+++ b/Documentation/stable_kernel_rules.txt
@@ -18,16 +18,15 @@ Rules on what kind of patches are accepted, and which ones are not, into the
  - It cannot contain any "trivial" fixes in it (spelling changes,
    whitespace cleanups, etc).
  - It must follow the Documentation/SubmittingPatches rules.
- - It or an equivalent fix must already exist in Linus' tree.  Quote the
-   respective commit ID in Linus' tree in your patch submission to -stable.
+ - It or an equivalent fix must already exist in Linus' tree (upstream).
 
 
 Procedure for submitting patches to the -stable tree:
 
  - Send the patch, after verifying that it follows the above rules, to
-   stable@kernel.org.
- - To have the patch automatically included in the stable tree, add the
-   the tag
+   stable@kernel.org.  You must note the upstream commit ID in the changelog
+   of your submission.
+ - To have the patch automatically included in the stable tree, add the tag
      Cc: stable@kernel.org
    in the sign-off area. Once the patch is merged it will be applied to
    the stable tree without anything else needing to be done by the author
diff --git a/Documentation/volatile-considered-harmful.txt b/Documentation/volatile-considered-harmful.txt
index 991c26a6ef64..db0cb228d64a 100644
--- a/Documentation/volatile-considered-harmful.txt
+++ b/Documentation/volatile-considered-harmful.txt
@@ -63,9 +63,9 @@ way to perform a busy wait is:
         cpu_relax();
 
 The cpu_relax() call can lower CPU power consumption or yield to a
-hyperthreaded twin processor; it also happens to serve as a memory barrier,
-so, once again, volatile is unnecessary.  Of course, busy-waiting is
-generally an anti-social act to begin with.
+hyperthreaded twin processor; it also happens to serve as a compiler
+barrier, so, once again, volatile is unnecessary.  Of course, busy-
+waiting is generally an anti-social act to begin with.
 
 There are still a few rare situations where volatile makes sense in the
 kernel:
diff --git a/Documentation/watchdog/src/watchdog-simple.c b/Documentation/watchdog/src/watchdog-simple.c
index 4cf72f3fa8e9..ba45803a2216 100644
--- a/Documentation/watchdog/src/watchdog-simple.c
+++ b/Documentation/watchdog/src/watchdog-simple.c
@@ -17,9 +17,6 @@ int main(void)
 			ret = -1;
 			break;
 		}
-		ret = fsync(fd);
-		if (ret)
-			break;
 		sleep(10);
 	}
 	close(fd);
diff --git a/Documentation/watchdog/src/watchdog-test.c b/Documentation/watchdog/src/watchdog-test.c
index a750532ffcf8..63fdc34ceb98 100644
--- a/Documentation/watchdog/src/watchdog-test.c
+++ b/Documentation/watchdog/src/watchdog-test.c
@@ -31,6 +31,8 @@ static void keep_alive(void)
  */
 int main(int argc, char *argv[])
 {
+    int flags;
+
     fd = open("/dev/watchdog", O_WRONLY);
 
     if (fd == -1) {
@@ -41,12 +43,14 @@ int main(int argc, char *argv[])
 
     if (argc > 1) {
 	if (!strncasecmp(argv[1], "-d", 2)) {
-	    ioctl(fd, WDIOC_SETOPTIONS, WDIOS_DISABLECARD);
+	    flags = WDIOS_DISABLECARD;
+	    ioctl(fd, WDIOC_SETOPTIONS, &flags);
 	    fprintf(stderr, "Watchdog card disabled.\n");
 	    fflush(stderr);
 	    exit(0);
 	} else if (!strncasecmp(argv[1], "-e", 2)) {
-	    ioctl(fd, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
+	    flags = WDIOS_ENABLECARD;
+	    ioctl(fd, WDIOC_SETOPTIONS, &flags);
 	    fprintf(stderr, "Watchdog card enabled.\n");
 	    fflush(stderr);
 	    exit(0);
diff --git a/Documentation/watchdog/watchdog-api.txt b/Documentation/watchdog/watchdog-api.txt
index 4cc4ba9d7150..eb7132ed8bbc 100644
--- a/Documentation/watchdog/watchdog-api.txt
+++ b/Documentation/watchdog/watchdog-api.txt
@@ -222,11 +222,10 @@ returned value is the temperature in degrees fahrenheit.
     ioctl(fd, WDIOC_GETTEMP, &temperature);
 
 Finally the SETOPTIONS ioctl can be used to control some aspects of
-the cards operation; right now the pcwd driver is the only one
-supporting this ioctl.
+the cards operation.
 
     int options = 0;
-    ioctl(fd, WDIOC_SETOPTIONS, options);
+    ioctl(fd, WDIOC_SETOPTIONS, &options);
 
 The following options are available:
author	Russell King <rmk+kernel@arm.linux.org.uk>	2010-05-17 17:24:04 +0100
committer	Russell King <rmk+kernel@arm.linux.org.uk>	2010-05-17 17:24:04 +0100
commit	ac1d426e825ab5778995f2f6f053ca2e6b45c622 (patch)
tree	75b91356ca39463e0112931aa6790802fb1e07a2 /Documentation
parent	fda0e18c8a7a3e02747c2b045b4fcd2c920410b9 (diff)
parent	a3685f00652af83f12b63e3b4ef48f29581ba48b (diff)