From 2430d12c94ff2bafcfe4f65edf7ee5f300d2d9c6 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sun, 13 Jun 2010 00:36:52 +0200
Subject: PM: describe kernel policy regarding wakeup defaults (v. 2)

This patch (as1381b) updates a comment describing the kernel's policy
toward enabling wakeup by default.

It also makes device_set_wakeup_capable() actually do something when
CONFIG_PM isn't enabled.  It's not clear this is necessary; however if
it isn't then device_init_wakeup() and device_can_wakeup() should also
be do-nothing routines.  Furthermore, I don't expect this change to
have any noticeable effect -- but if it does then clearly the old
behavior was wrong.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm_wakeup.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 22d64c18056..76aca48722a 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -29,8 +29,11 @@
 
 #ifdef CONFIG_PM
 
-/* changes to device_may_wakeup take effect on the next pm state change.
- * by default, devices should wakeup if they can.
+/* Changes to device_may_wakeup take effect on the next pm state change.
+ *
+ * By default, most devices should leave wakeup disabled.  The exceptions
+ * are devices that everyone expects to be wakeup sources: keyboards,
+ * power buttons, possibly network interfaces, etc.
  */
 static inline void device_init_wakeup(struct device *dev, bool val)
 {
@@ -59,7 +62,7 @@ static inline bool device_may_wakeup(struct device *dev)
 
 #else /* !CONFIG_PM */
 
-/* For some reason the next two routines work even without CONFIG_PM */
+/* For some reason the following routines work even without CONFIG_PM */
 static inline void device_init_wakeup(struct device *dev, bool val)
 {
 	dev->power.can_wakeup = val;
@@ -67,6 +70,7 @@ static inline void device_init_wakeup(struct device *dev, bool val)
 
 static inline void device_set_wakeup_capable(struct device *dev, bool capable)
 {
+	dev->power.can_wakeup = capable;
 }
 
 static inline bool device_can_wakeup(struct device *dev)
-- 
cgit v1.2.3


From b14e033e17d0ea0ba12668d0d2f371cd31586994 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 29 Jun 2010 22:49:24 +0200
Subject: PNPACPI: Add support for remote wakeup

This patch (as1354) adds remote-wakeup support to the pnpacpi driver.
The new can_wakeup method also allows other PNP protocol drivers
(pnpbios or iaspnp) to add wakeup support, but I don't know enough
about how they work to actually do it.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reviewed-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/pnp/core.c         |  3 +++
 drivers/pnp/pnpacpi/core.c | 23 +++++++++++++++++++++++
 include/linux/pnp.h        |  1 +
 3 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c
index 5dba90995d9..88b3cde5259 100644
--- a/drivers/pnp/core.c
+++ b/drivers/pnp/core.c
@@ -164,6 +164,9 @@ int __pnp_add_device(struct pnp_dev *dev)
 	list_add_tail(&dev->global_list, &pnp_global);
 	list_add_tail(&dev->protocol_list, &dev->protocol->devices);
 	spin_unlock(&pnp_lock);
+	if (dev->protocol->can_wakeup)
+		device_set_wakeup_capable(&dev->dev,
+				dev->protocol->can_wakeup(dev));
 	return device_register(&dev->dev);
 }
 
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index f7ff628b7d9..dc4e32e031e 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -122,17 +122,37 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev)
 }
 
 #ifdef CONFIG_ACPI_SLEEP
+static bool pnpacpi_can_wakeup(struct pnp_dev *dev)
+{
+	struct acpi_device *acpi_dev = dev->data;
+	acpi_handle handle = acpi_dev->handle;
+
+	return acpi_bus_can_wakeup(handle);
+}
+
 static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
 {
 	struct acpi_device *acpi_dev = dev->data;
 	acpi_handle handle = acpi_dev->handle;
 	int power_state;
 
+	if (device_can_wakeup(&dev->dev)) {
+		int rc = acpi_pm_device_sleep_wake(&dev->dev,
+				device_may_wakeup(&dev->dev));
+
+		if (rc)
+			return rc;
+	}
 	power_state = acpi_pm_device_sleep_state(&dev->dev, NULL);
 	if (power_state < 0)
 		power_state = (state.event == PM_EVENT_ON) ?
 				ACPI_STATE_D0 : ACPI_STATE_D3;
 
+	/* acpi_bus_set_power() often fails (keyboard port can't be
+	 * powered-down?), and in any case, our return value is ignored
+	 * by pnp_bus_suspend().  Hence we don't revert the wakeup
+	 * setting if the set_power fails.
+	 */
 	return acpi_bus_set_power(handle, power_state);
 }
 
@@ -141,6 +161,8 @@ static int pnpacpi_resume(struct pnp_dev *dev)
 	struct acpi_device *acpi_dev = dev->data;
 	acpi_handle handle = acpi_dev->handle;
 
+	if (device_may_wakeup(&dev->dev))
+		acpi_pm_device_sleep_wake(&dev->dev, false);
 	return acpi_bus_set_power(handle, ACPI_STATE_D0);
 }
 #endif
@@ -151,6 +173,7 @@ struct pnp_protocol pnpacpi_protocol = {
 	.set	 = pnpacpi_set_resources,
 	.disable = pnpacpi_disable_resources,
 #ifdef CONFIG_ACPI_SLEEP
+	.can_wakeup = pnpacpi_can_wakeup,
 	.suspend = pnpacpi_suspend,
 	.resume = pnpacpi_resume,
 #endif
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 7c4193eb007..1bc1338b817 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -414,6 +414,7 @@ struct pnp_protocol {
 	int (*disable) (struct pnp_dev *dev);
 
 	/* protocol specific suspend/resume */
+	bool (*can_wakeup) (struct pnp_dev *dev);
 	int (*suspend) (struct pnp_dev * dev, pm_message_t state);
 	int (*resume) (struct pnp_dev * dev);
 
-- 
cgit v1.2.3


From c125e96f044427f38d106fab7bc5e4a5e6a18262 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 5 Jul 2010 22:43:53 +0200
Subject: PM: Make it possible to avoid races between wakeup and system sleep

One of the arguments during the suspend blockers discussion was that
the mainline kernel didn't contain any mechanisms making it possible
to avoid races between wakeup and system suspend.

Generally, there are two problems in that area.  First, if a wakeup
event occurs exactly when /sys/power/state is being written to, it
may be delivered to user space right before the freezer kicks in, so
the user space consumer of the event may not be able to process it
before the system is suspended.  Second, if a wakeup event occurs
after user space has been frozen, it is not generally guaranteed that
the ongoing transition of the system into a sleep state will be
aborted.

To address these issues introduce a new global sysfs attribute,
/sys/power/wakeup_count, associated with a running counter of wakeup
events and three helper functions, pm_stay_awake(), pm_relax(), and
pm_wakeup_event(), that may be used by kernel subsystems to control
the behavior of this attribute and to request the PM core to abort
system transitions into a sleep state already in progress.

The /sys/power/wakeup_count file may be read from or written to by
user space.  Reads will always succeed (unless interrupted by a
signal) and return the current value of the wakeup events counter.
Writes, however, will only succeed if the written number is equal to
the current value of the wakeup events counter.  If a write is
successful, it will cause the kernel to save the current value of the
wakeup events counter and to abort the subsequent system transition
into a sleep state if any wakeup events are reported after the write
has returned.

[The assumption is that before writing to /sys/power/state user space
will first read from /sys/power/wakeup_count.  Next, user space
consumers of wakeup events will have a chance to acknowledge or
veto the upcoming system transition to a sleep state.  Finally, if
the transition is allowed to proceed, /sys/power/wakeup_count will
be written to and if that succeeds, /sys/power/state will be written
to as well.  Still, if any wakeup events are reported to the PM core
by kernel subsystems after that point, the transition will be
aborted.]

Additionally, put a wakeup events counter into struct dev_pm_info and
make these per-device wakeup event counters available via sysfs,
so that it's possible to check the activity of various wakeup event
sources within the kernel.

To illustrate how subsystems can use pm_wakeup_event(), make the
low-level PCI runtime PM wakeup-handling code use it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: markgross <markgross@thegnar.org>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
---
 Documentation/ABI/testing/sysfs-power |  15 +++
 drivers/base/power/Makefile           |   2 +-
 drivers/base/power/main.c             |   1 +
 drivers/base/power/sysfs.c            |  15 +++
 drivers/base/power/wakeup.c           | 229 ++++++++++++++++++++++++++++++++++
 drivers/pci/pci-acpi.c                |   1 +
 drivers/pci/pci.c                     |  20 ++-
 drivers/pci/pci.h                     |   1 +
 drivers/pci/pcie/pme/pcie_pme.c       |   5 +-
 include/linux/pm.h                    |  10 ++
 include/linux/suspend.h               |   7 ++
 kernel/power/hibernate.c              |  20 ++-
 kernel/power/main.c                   |  55 ++++++++
 kernel/power/suspend.c                |   4 +-
 14 files changed, 375 insertions(+), 10 deletions(-)
 create mode 100644 drivers/base/power/wakeup.c

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index d6a801f45b4..2875f1f74a0 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -114,3 +114,18 @@ Description:
 		if this file contains "1", which is the default.  It may be
 		disabled by writing "0" to this file, in which case all devices
 		will be suspended and resumed synchronously.
+
+What:		/sys/power/wakeup_count
+Date:		July 2010
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/power/wakeup_count file allows user space to put the
+		system into a sleep state while taking into account the
+		concurrent arrival of wakeup events.  Reading from it returns
+		the current number of registered wakeup events and it blocks if
+		some wakeup events are being processed at the time the file is
+		read from.  Writing to it will only succeed if the current
+		number of wakeup events is equal to the written value and, if
+		successful, will make the kernel abort a subsequent transition
+		to a sleep state if any wakeup events are reported after the
+		write has returned.
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 89de75325ce..cbccf9a3cee 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_PM)	+= sysfs.o
-obj-$(CONFIG_PM_SLEEP)	+= main.o
+obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_OPS)	+= generic_ops.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 941fcb87e52..5419a49ff13 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -59,6 +59,7 @@ void device_pm_init(struct device *dev)
 {
 	dev->power.status = DPM_ON;
 	init_completion(&dev->power.completion);
+	dev->power.wakeup_count = 0;
 	pm_runtime_init(dev);
 }
 
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index a4c33bc5125..81d344e0e95 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -73,6 +73,8 @@
  *	device are known to the PM core.  However, for some devices this
  *	attribute is set to "enabled" by bus type code or device drivers and in
  *	that cases it should be safe to leave the default value.
+ *
+ *	wakeup_count - Report the number of wakeup events related to the device
  */
 
 static const char enabled[] = "enabled";
@@ -144,6 +146,16 @@ wake_store(struct device * dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
 
+#ifdef CONFIG_PM_SLEEP
+static ssize_t wakeup_count_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", dev->power.wakeup_count);
+}
+
+static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL);
+#endif
+
 #ifdef CONFIG_PM_ADVANCED_DEBUG
 #ifdef CONFIG_PM_RUNTIME
 
@@ -230,6 +242,9 @@ static struct attribute * power_attrs[] = {
 	&dev_attr_control.attr,
 #endif
 	&dev_attr_wakeup.attr,
+#ifdef CONFIG_PM_SLEEP
+	&dev_attr_wakeup_count.attr,
+#endif
 #ifdef CONFIG_PM_ADVANCED_DEBUG
 	&dev_attr_async.attr,
 #ifdef CONFIG_PM_RUNTIME
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
new file mode 100644
index 00000000000..25599077c39
--- /dev/null
+++ b/drivers/base/power/wakeup.c
@@ -0,0 +1,229 @@
+/*
+ * drivers/base/power/wakeup.c - System wakeup events framework
+ *
+ * Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/capability.h>
+#include <linux/suspend.h>
+#include <linux/pm.h>
+
+/*
+ * If set, the suspend/hibernate code will abort transitions to a sleep state
+ * if wakeup events are registered during or immediately before the transition.
+ */
+bool events_check_enabled;
+
+/* The counter of registered wakeup events. */
+static unsigned long event_count;
+/* A preserved old value of event_count. */
+static unsigned long saved_event_count;
+/* The counter of wakeup events being processed. */
+static unsigned long events_in_progress;
+
+static DEFINE_SPINLOCK(events_lock);
+
+/*
+ * The functions below use the observation that each wakeup event starts a
+ * period in which the system should not be suspended.  The moment this period
+ * will end depends on how the wakeup event is going to be processed after being
+ * detected and all of the possible cases can be divided into two distinct
+ * groups.
+ *
+ * First, a wakeup event may be detected by the same functional unit that will
+ * carry out the entire processing of it and possibly will pass it to user space
+ * for further processing.  In that case the functional unit that has detected
+ * the event may later "close" the "no suspend" period associated with it
+ * directly as soon as it has been dealt with.  The pair of pm_stay_awake() and
+ * pm_relax(), balanced with each other, is supposed to be used in such
+ * situations.
+ *
+ * Second, a wakeup event may be detected by one functional unit and processed
+ * by another one.  In that case the unit that has detected it cannot really
+ * "close" the "no suspend" period associated with it, unless it knows in
+ * advance what's going to happen to the event during processing.  This
+ * knowledge, however, may not be available to it, so it can simply specify time
+ * to wait before the system can be suspended and pass it as the second
+ * argument of pm_wakeup_event().
+ */
+
+/**
+ * pm_stay_awake - Notify the PM core that a wakeup event is being processed.
+ * @dev: Device the wakeup event is related to.
+ *
+ * Notify the PM core of a wakeup event (signaled by @dev) by incrementing the
+ * counter of wakeup events being processed.  If @dev is not NULL, the counter
+ * of wakeup events related to @dev is incremented too.
+ *
+ * Call this function after detecting of a wakeup event if pm_relax() is going
+ * to be called directly after processing the event (and possibly passing it to
+ * user space for further processing).
+ *
+ * It is safe to call this function from interrupt context.
+ */
+void pm_stay_awake(struct device *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&events_lock, flags);
+	if (dev)
+		dev->power.wakeup_count++;
+
+	events_in_progress++;
+	spin_unlock_irqrestore(&events_lock, flags);
+}
+
+/**
+ * pm_relax - Notify the PM core that processing of a wakeup event has ended.
+ *
+ * Notify the PM core that a wakeup event has been processed by decrementing
+ * the counter of wakeup events being processed and incrementing the counter
+ * of registered wakeup events.
+ *
+ * Call this function for wakeup events whose processing started with calling
+ * pm_stay_awake().
+ *
+ * It is safe to call it from interrupt context.
+ */
+void pm_relax(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&events_lock, flags);
+	if (events_in_progress) {
+		events_in_progress--;
+		event_count++;
+	}
+	spin_unlock_irqrestore(&events_lock, flags);
+}
+
+/**
+ * pm_wakeup_work_fn - Deferred closing of a wakeup event.
+ *
+ * Execute pm_relax() for a wakeup event detected in the past and free the
+ * work item object used for queuing up the work.
+ */
+static void pm_wakeup_work_fn(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+
+	pm_relax();
+	kfree(dwork);
+}
+
+/**
+ * pm_wakeup_event - Notify the PM core of a wakeup event.
+ * @dev: Device the wakeup event is related to.
+ * @msec: Anticipated event processing time (in milliseconds).
+ *
+ * Notify the PM core of a wakeup event (signaled by @dev) that will take
+ * approximately @msec milliseconds to be processed by the kernel.  Increment
+ * the counter of wakeup events being processed and queue up a work item
+ * that will execute pm_relax() for the event after @msec milliseconds.  If @dev
+ * is not NULL, the counter of wakeup events related to @dev is incremented too.
+ *
+ * It is safe to call this function from interrupt context.
+ */
+void pm_wakeup_event(struct device *dev, unsigned int msec)
+{
+	unsigned long flags;
+	struct delayed_work *dwork;
+
+	dwork = msec ? kzalloc(sizeof(*dwork), GFP_ATOMIC) : NULL;
+
+	spin_lock_irqsave(&events_lock, flags);
+	if (dev)
+		dev->power.wakeup_count++;
+
+	if (dwork) {
+		INIT_DELAYED_WORK(dwork, pm_wakeup_work_fn);
+		schedule_delayed_work(dwork, msecs_to_jiffies(msec));
+
+		events_in_progress++;
+	} else {
+		event_count++;
+	}
+	spin_unlock_irqrestore(&events_lock, flags);
+}
+
+/**
+ * pm_check_wakeup_events - Check for new wakeup events.
+ *
+ * Compare the current number of registered wakeup events with its preserved
+ * value from the past to check if new wakeup events have been registered since
+ * the old value was stored.  Check if the current number of wakeup events being
+ * processed is zero.
+ */
+bool pm_check_wakeup_events(void)
+{
+	unsigned long flags;
+	bool ret = true;
+
+	spin_lock_irqsave(&events_lock, flags);
+	if (events_check_enabled) {
+		ret = (event_count == saved_event_count) && !events_in_progress;
+		events_check_enabled = ret;
+	}
+	spin_unlock_irqrestore(&events_lock, flags);
+	return ret;
+}
+
+/**
+ * pm_get_wakeup_count - Read the number of registered wakeup events.
+ * @count: Address to store the value at.
+ *
+ * Store the number of registered wakeup events at the address in @count.  Block
+ * if the current number of wakeup events being processed is nonzero.
+ *
+ * Return false if the wait for the number of wakeup events being processed to
+ * drop down to zero has been interrupted by a signal (and the current number
+ * of wakeup events being processed is still nonzero).  Otherwise return true.
+ */
+bool pm_get_wakeup_count(unsigned long *count)
+{
+	bool ret;
+
+	spin_lock_irq(&events_lock);
+	if (capable(CAP_SYS_ADMIN))
+		events_check_enabled = false;
+
+	while (events_in_progress && !signal_pending(current)) {
+		spin_unlock_irq(&events_lock);
+
+		schedule_timeout_interruptible(msecs_to_jiffies(100));
+
+		spin_lock_irq(&events_lock);
+	}
+	*count = event_count;
+	ret = !events_in_progress;
+	spin_unlock_irq(&events_lock);
+	return ret;
+}
+
+/**
+ * pm_save_wakeup_count - Save the current number of registered wakeup events.
+ * @count: Value to compare with the current number of registered wakeup events.
+ *
+ * If @count is equal to the current number of registered wakeup events and the
+ * current number of wakeup events being processed is zero, store @count as the
+ * old number of registered wakeup events to be used by pm_check_wakeup_events()
+ * and return true.  Otherwise return false.
+ */
+bool pm_save_wakeup_count(unsigned long count)
+{
+	bool ret = false;
+
+	spin_lock_irq(&events_lock);
+	if (count == event_count && !events_in_progress) {
+		saved_event_count = count;
+		events_check_enabled = true;
+		ret = true;
+	}
+	spin_unlock_irq(&events_lock);
+	return ret;
+}
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 2e7a3bf1382..1ab98bbe58d 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -48,6 +48,7 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
 	if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) {
 		pci_check_pme_status(pci_dev);
 		pm_runtime_resume(&pci_dev->dev);
+		pci_wakeup_event(pci_dev);
 		if (pci_dev->subordinate)
 			pci_pme_wakeup_bus(pci_dev->subordinate);
 	}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 740fb4ea966..130ed1daf0f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1275,6 +1275,22 @@ bool pci_check_pme_status(struct pci_dev *dev)
 	return ret;
 }
 
+/*
+ * Time to wait before the system can be put into a sleep state after reporting
+ * a wakeup event signaled by a PCI device.
+ */
+#define PCI_WAKEUP_COOLDOWN	100
+
+/**
+ * pci_wakeup_event - Report a wakeup event related to a given PCI device.
+ * @dev: Device to report the wakeup event for.
+ */
+void pci_wakeup_event(struct pci_dev *dev)
+{
+	if (device_may_wakeup(&dev->dev))
+		pm_wakeup_event(&dev->dev, PCI_WAKEUP_COOLDOWN);
+}
+
 /**
  * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set.
  * @dev: Device to handle.
@@ -1285,8 +1301,10 @@ bool pci_check_pme_status(struct pci_dev *dev)
  */
 static int pci_pme_wakeup(struct pci_dev *dev, void *ign)
 {
-	if (pci_check_pme_status(dev))
+	if (pci_check_pme_status(dev)) {
 		pm_request_resume(&dev->dev);
+		pci_wakeup_event(dev);
+	}
 	return 0;
 }
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f8077b3c8c8..c8b7fd056cc 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -56,6 +56,7 @@ extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
 extern void pci_disable_enabled_device(struct pci_dev *dev);
 extern bool pci_check_pme_status(struct pci_dev *dev);
 extern int pci_finish_runtime_suspend(struct pci_dev *dev);
+extern void pci_wakeup_event(struct pci_dev *dev);
 extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
 extern void pci_pme_wakeup_bus(struct pci_bus *bus);
 extern void pci_pm_init(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/pme/pcie_pme.c b/drivers/pci/pcie/pme/pcie_pme.c
index d672a0a6381..bbdea18693d 100644
--- a/drivers/pci/pcie/pme/pcie_pme.c
+++ b/drivers/pci/pcie/pme/pcie_pme.c
@@ -154,6 +154,7 @@ static bool pcie_pme_walk_bus(struct pci_bus *bus)
 		/* Skip PCIe devices in case we started from a root port. */
 		if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) {
 			pm_request_resume(&dev->dev);
+			pci_wakeup_event(dev);
 			ret = true;
 		}
 
@@ -254,8 +255,10 @@ static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id)
 	if (found) {
 		/* The device is there, but we have to check its PME status. */
 		found = pci_check_pme_status(dev);
-		if (found)
+		if (found) {
 			pm_request_resume(&dev->dev);
+			pci_wakeup_event(dev);
+		}
 		pci_dev_put(dev);
 	} else if (devfn) {
 		/*
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 8e258c72797..b417fc46f3f 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -457,6 +457,7 @@ struct dev_pm_info {
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
 	struct completion	completion;
+	unsigned long		wakeup_count;
 #endif
 #ifdef CONFIG_PM_RUNTIME
 	struct timer_list	suspend_timer;
@@ -552,6 +553,11 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 	} while (0)
 
 extern void device_pm_wait_for_dev(struct device *sub, struct device *dev);
+
+/* drivers/base/power/wakeup.c */
+extern void pm_wakeup_event(struct device *dev, unsigned int msec);
+extern void pm_stay_awake(struct device *dev);
+extern void pm_relax(void);
 #else /* !CONFIG_PM_SLEEP */
 
 #define device_pm_lock() do {} while (0)
@@ -565,6 +571,10 @@ static inline int dpm_suspend_start(pm_message_t state)
 #define suspend_report_result(fn, ret)		do {} while (0)
 
 static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {}
+
+static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {}
+static inline void pm_stay_awake(struct device *dev) {}
+static inline void pm_relax(void) {}
 #endif /* !CONFIG_PM_SLEEP */
 
 /* How to reorder dpm_list after device_move() */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index bc7d6bb4cd8..bf1bab7b059 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -286,6 +286,13 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
 		{ .notifier_call = fn, .priority = pri };	\
 	register_pm_notifier(&fn##_nb);			\
 }
+
+/* drivers/base/power/wakeup.c */
+extern bool events_check_enabled;
+
+extern bool pm_check_wakeup_events(void);
+extern bool pm_get_wakeup_count(unsigned long *count);
+extern bool pm_save_wakeup_count(unsigned long count);
 #else /* !CONFIG_PM_SLEEP */
 
 static inline int register_pm_notifier(struct notifier_block *nb)
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index aa9e916da4d..f6120291663 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -277,7 +277,7 @@ static int create_image(int platform_mode)
 		goto Enable_irqs;
 	}
 
-	if (hibernation_test(TEST_CORE))
+	if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events())
 		goto Power_up;
 
 	in_suspend = 1;
@@ -288,8 +288,10 @@ static int create_image(int platform_mode)
 			error);
 	/* Restore control flow magically appears here */
 	restore_processor_state();
-	if (!in_suspend)
+	if (!in_suspend) {
+		events_check_enabled = false;
 		platform_leave(platform_mode);
+	}
 
  Power_up:
 	sysdev_resume();
@@ -511,14 +513,20 @@ int hibernation_platform_enter(void)
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_HIBERNATE);
+	if (!pm_check_wakeup_events()) {
+		error = -EAGAIN;
+		goto Power_up;
+	}
+
 	hibernation_ops->enter();
 	/* We should never get here */
 	while (1);
 
-	/*
-	 * We don't need to reenable the nonboot CPUs or resume consoles, since
-	 * the system is going to be halted anyway.
-	 */
+ Power_up:
+	sysdev_resume();
+	local_irq_enable();
+	enable_nonboot_cpus();
+
  Platform_finish:
 	hibernation_ops->finish();
 
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b58800b21fc..62b0bc6e498 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -204,6 +204,60 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
 
 power_attr(state);
 
+#ifdef CONFIG_PM_SLEEP
+/*
+ * The 'wakeup_count' attribute, along with the functions defined in
+ * drivers/base/power/wakeup.c, provides a means by which wakeup events can be
+ * handled in a non-racy way.
+ *
+ * If a wakeup event occurs when the system is in a sleep state, it simply is
+ * woken up.  In turn, if an event that would wake the system up from a sleep
+ * state occurs when it is undergoing a transition to that sleep state, the
+ * transition should be aborted.  Moreover, if such an event occurs when the
+ * system is in the working state, an attempt to start a transition to the
+ * given sleep state should fail during certain period after the detection of
+ * the event.  Using the 'state' attribute alone is not sufficient to satisfy
+ * these requirements, because a wakeup event may occur exactly when 'state'
+ * is being written to and may be delivered to user space right before it is
+ * frozen, so the event will remain only partially processed until the system is
+ * woken up by another event.  In particular, it won't cause the transition to
+ * a sleep state to be aborted.
+ *
+ * This difficulty may be overcome if user space uses 'wakeup_count' before
+ * writing to 'state'.  It first should read from 'wakeup_count' and store
+ * the read value.  Then, after carrying out its own preparations for the system
+ * transition to a sleep state, it should write the stored value to
+ * 'wakeup_count'.  If that fails, at least one wakeup event has occured since
+ * 'wakeup_count' was read and 'state' should not be written to.  Otherwise, it
+ * is allowed to write to 'state', but the transition will be aborted if there
+ * are any wakeup events detected after 'wakeup_count' was written to.
+ */
+
+static ssize_t wakeup_count_show(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				char *buf)
+{
+	unsigned long val;
+
+	return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR;
+}
+
+static ssize_t wakeup_count_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t n)
+{
+	unsigned long val;
+
+	if (sscanf(buf, "%lu", &val) == 1) {
+		if (pm_save_wakeup_count(val))
+			return n;
+	}
+	return -EINVAL;
+}
+
+power_attr(wakeup_count);
+#endif /* CONFIG_PM_SLEEP */
+
 #ifdef CONFIG_PM_TRACE
 int pm_trace_enabled;
 
@@ -236,6 +290,7 @@ static struct attribute * g[] = {
 #endif
 #ifdef CONFIG_PM_SLEEP
 	&pm_async_attr.attr,
+	&wakeup_count_attr.attr,
 #ifdef CONFIG_PM_DEBUG
 	&pm_test_attr.attr,
 #endif
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index f37cb7dd440..5f8d09f9432 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -163,8 +163,10 @@ static int suspend_enter(suspend_state_t state)
 
 	error = sysdev_suspend(PMSG_SUSPEND);
 	if (!error) {
-		if (!suspend_test(TEST_CORE))
+		if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) {
 			error = suspend_ops->enter(state);
+			events_check_enabled = false;
+		}
 		sysdev_resume();
 	}
 
-- 
cgit v1.2.3


From 12e4d0cc2e0a776a526c93bb2fcb9267abc6e0b1 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Thu, 1 Jul 2010 21:46:36 +0200
Subject: plist: Add plist_last

plist is currently used by the scheduler, which only needs to know the
highest item in the list.  This adds plist_last which allows you to
find the lowest.  This is necessary for using plists to implement a
fast search of dynamic ranges in pm_qos which can have both highest
and lowest criteria.

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/plist.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/plist.h b/include/linux/plist.h
index 6898985e7b3..7254eda078e 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -259,6 +259,23 @@ static inline int plist_node_empty(const struct plist_node *node)
 	container_of(plist_first(head), type, member)
 #endif
 
+/**
+ * plist_last_entry - get the struct for the last entry
+ * @head:	the &struct plist_head pointer
+ * @type:	the type of the struct this is embedded in
+ * @member:	the name of the list_struct within the struct
+ */
+#ifdef CONFIG_DEBUG_PI_LIST
+# define plist_last_entry(head, type, member)	\
+({ \
+	WARN_ON(plist_head_empty(head)); \
+	container_of(plist_last(head), type, member); \
+})
+#else
+# define plist_last_entry(head, type, member)	\
+	container_of(plist_last(head), type, member)
+#endif
+
 /**
  * plist_first - return the first node (and thus, highest priority)
  * @head:	the &struct plist_head pointer
@@ -271,4 +288,16 @@ static inline struct plist_node *plist_first(const struct plist_head *head)
 			  struct plist_node, plist.node_list);
 }
 
+/**
+ * plist_last - return the last node (and thus, lowest priority)
+ * @head:	the &struct plist_head pointer
+ *
+ * Assumes the plist is _not_ empty.
+ */
+static inline struct plist_node *plist_last(const struct plist_head *head)
+{
+	return list_entry(head->node_list.prev,
+			  struct plist_node, plist.node_list);
+}
+
 #endif
-- 
cgit v1.2.3


From 82f682514a5df89ffb3890627eebf0897b7a84ec Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Mon, 5 Jul 2010 22:53:06 +0200
Subject: pm_qos: Get rid of the allocation in pm_qos_add_request()

All current users of pm_qos_add_request() have the ability to supply
the memory required by the pm_qos routines, so make them do this and
eliminate the kmalloc() with pm_qos_add_request().  This has the
double benefit of making the call never fail and allowing it to be
called from atomic context.

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: mark gross <markgross@thegnar.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/net/e1000e/netdev.c            | 17 ++++-----
 drivers/net/igbvf/netdev.c             |  9 ++---
 drivers/net/wireless/ipw2x00/ipw2100.c | 12 +++---
 include/linux/netdevice.h              |  2 +-
 include/linux/pm_qos_params.h          | 13 +++++--
 include/sound/pcm.h                    |  2 +-
 kernel/pm_qos_params.c                 | 67 ++++++++++++++++++++--------------
 sound/core/pcm_native.c                | 13 +++----
 8 files changed, 74 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 57a7e41da69..9f13b660b80 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2901,10 +2901,10 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
 			 * dropped transactions.
 			 */
 			pm_qos_update_request(
-				adapter->netdev->pm_qos_req, 55);
+				&adapter->netdev->pm_qos_req, 55);
 		} else {
 			pm_qos_update_request(
-				adapter->netdev->pm_qos_req,
+				&adapter->netdev->pm_qos_req,
 				PM_QOS_DEFAULT_VALUE);
 		}
 	}
@@ -3196,9 +3196,9 @@ int e1000e_up(struct e1000_adapter *adapter)
 
 	/* DMA latency requirement to workaround early-receive/jumbo issue */
 	if (adapter->flags & FLAG_HAS_ERT)
-		adapter->netdev->pm_qos_req =
-			pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY,
-				       PM_QOS_DEFAULT_VALUE);
+		pm_qos_add_request(&adapter->netdev->pm_qos_req,
+				   PM_QOS_CPU_DMA_LATENCY,
+				   PM_QOS_DEFAULT_VALUE);
 
 	/* hardware has been reset, we need to reload some things */
 	e1000_configure(adapter);
@@ -3263,11 +3263,8 @@ void e1000e_down(struct e1000_adapter *adapter)
 	e1000_clean_tx_ring(adapter);
 	e1000_clean_rx_ring(adapter);
 
-	if (adapter->flags & FLAG_HAS_ERT) {
-		pm_qos_remove_request(
-			      adapter->netdev->pm_qos_req);
-		adapter->netdev->pm_qos_req = NULL;
-	}
+	if (adapter->flags & FLAG_HAS_ERT)
+		pm_qos_remove_request(&adapter->netdev->pm_qos_req);
 
 	/*
 	 * TODO: for power management, we could drop the link and
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index 5e2b2a8c56c..add6197d3bc 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -48,7 +48,7 @@
 #define DRV_VERSION "1.0.0-k0"
 char igbvf_driver_name[] = "igbvf";
 const char igbvf_driver_version[] = DRV_VERSION;
-struct pm_qos_request_list *igbvf_driver_pm_qos_req;
+static struct pm_qos_request_list igbvf_driver_pm_qos_req;
 static const char igbvf_driver_string[] =
 				"Intel(R) Virtual Function Network Driver";
 static const char igbvf_copyright[] = "Copyright (c) 2009 Intel Corporation.";
@@ -2902,8 +2902,8 @@ static int __init igbvf_init_module(void)
 	printk(KERN_INFO "%s\n", igbvf_copyright);
 
 	ret = pci_register_driver(&igbvf_driver);
-	igbvf_driver_pm_qos_req = pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY,
-	                       PM_QOS_DEFAULT_VALUE);
+	pm_qos_add_request(&igbvf_driver_pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
+			   PM_QOS_DEFAULT_VALUE);
 
 	return ret;
 }
@@ -2918,8 +2918,7 @@ module_init(igbvf_init_module);
 static void __exit igbvf_exit_module(void)
 {
 	pci_unregister_driver(&igbvf_driver);
-	pm_qos_remove_request(igbvf_driver_pm_qos_req);
-	igbvf_driver_pm_qos_req = NULL;
+	pm_qos_remove_request(&igbvf_driver_pm_qos_req);
 }
 module_exit(igbvf_exit_module);
 
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 0bd4dfa59a8..7f0d98b885b 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -174,7 +174,7 @@ that only one external action is invoked at a time.
 #define DRV_DESCRIPTION	"Intel(R) PRO/Wireless 2100 Network Driver"
 #define DRV_COPYRIGHT	"Copyright(c) 2003-2006 Intel Corporation"
 
-struct pm_qos_request_list *ipw2100_pm_qos_req;
+struct pm_qos_request_list ipw2100_pm_qos_req;
 
 /* Debugging stuff */
 #ifdef CONFIG_IPW2100_DEBUG
@@ -1741,7 +1741,7 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 	/* the ipw2100 hardware really doesn't want power management delays
 	 * longer than 175usec
 	 */
-	pm_qos_update_request(ipw2100_pm_qos_req, 175);
+	pm_qos_update_request(&ipw2100_pm_qos_req, 175);
 
 	/* If the interrupt is enabled, turn it off... */
 	spin_lock_irqsave(&priv->low_lock, flags);
@@ -1889,7 +1889,7 @@ static void ipw2100_down(struct ipw2100_priv *priv)
 	ipw2100_disable_interrupts(priv);
 	spin_unlock_irqrestore(&priv->low_lock, flags);
 
-	pm_qos_update_request(ipw2100_pm_qos_req, PM_QOS_DEFAULT_VALUE);
+	pm_qos_update_request(&ipw2100_pm_qos_req, PM_QOS_DEFAULT_VALUE);
 
 	/* We have to signal any supplicant if we are disassociating */
 	if (associated)
@@ -6669,8 +6669,8 @@ static int __init ipw2100_init(void)
 	if (ret)
 		goto out;
 
-	ipw2100_pm_qos_req = pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY,
-			PM_QOS_DEFAULT_VALUE);
+	pm_qos_add_request(&ipw2100_pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
+			   PM_QOS_DEFAULT_VALUE);
 #ifdef CONFIG_IPW2100_DEBUG
 	ipw2100_debug_level = debug;
 	ret = driver_create_file(&ipw2100_pci_driver.driver,
@@ -6692,7 +6692,7 @@ static void __exit ipw2100_exit(void)
 			   &driver_attr_debug_level);
 #endif
 	pci_unregister_driver(&ipw2100_pci_driver);
-	pm_qos_remove_request(ipw2100_pm_qos_req);
+	pm_qos_remove_request(&ipw2100_pm_qos_req);
 }
 
 module_init(ipw2100_init);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b21e4054c12..2f22119b4b0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -779,7 +779,7 @@ struct net_device {
 	 */
 	char			name[IFNAMSIZ];
 
-	struct pm_qos_request_list *pm_qos_req;
+	struct pm_qos_request_list pm_qos_req;
 
 	/* device name hash chain */
 	struct hlist_node	name_hlist;
diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h
index 8ba440e5eb7..77cbddb3784 100644
--- a/include/linux/pm_qos_params.h
+++ b/include/linux/pm_qos_params.h
@@ -1,8 +1,10 @@
+#ifndef _LINUX_PM_QOS_PARAMS_H
+#define _LINUX_PM_QOS_PARAMS_H
 /* interface for the pm_qos_power infrastructure of the linux kernel.
  *
  * Mark Gross <mgross@linux.intel.com>
  */
-#include <linux/list.h>
+#include <linux/plist.h>
 #include <linux/notifier.h>
 #include <linux/miscdevice.h>
 
@@ -14,9 +16,12 @@
 #define PM_QOS_NUM_CLASSES 4
 #define PM_QOS_DEFAULT_VALUE -1
 
-struct pm_qos_request_list;
+struct pm_qos_request_list {
+	struct plist_node list;
+	int pm_qos_class;
+};
 
-struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value);
+void pm_qos_add_request(struct pm_qos_request_list *l, int pm_qos_class, s32 value);
 void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
 		s32 new_value);
 void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req);
@@ -24,4 +29,6 @@ void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req);
 int pm_qos_request(int pm_qos_class);
 int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier);
 int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier);
+int pm_qos_request_active(struct pm_qos_request_list *req);
 
+#endif
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index dd76cdede64..6e3a29732dc 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -366,7 +366,7 @@ struct snd_pcm_substream {
 	int number;
 	char name[32];			/* substream name */
 	int stream;			/* stream (direction) */
-	struct pm_qos_request_list *latency_pm_qos_req; /* pm_qos request */
+	struct pm_qos_request_list latency_pm_qos_req; /* pm_qos request */
 	size_t buffer_bytes_max;	/* limit ring buffer size */
 	struct snd_dma_buffer dma_buffer;
 	unsigned int dma_buf_id;
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index db8e51d7f39..996a4dec5f9 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -30,7 +30,6 @@
 /*#define DEBUG*/
 
 #include <linux/pm_qos_params.h>
-#include <linux/plist.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
@@ -49,11 +48,6 @@
  * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
  * held, taken with _irqsave.  One lock to rule them all
  */
-struct pm_qos_request_list {
-	struct plist_node list;
-	int pm_qos_class;
-};
-
 enum pm_qos_type {
 	PM_QOS_MAX,		/* return the largest value */
 	PM_QOS_MIN		/* return the smallest value */
@@ -210,6 +204,12 @@ int pm_qos_request(int pm_qos_class)
 }
 EXPORT_SYMBOL_GPL(pm_qos_request);
 
+int pm_qos_request_active(struct pm_qos_request_list *req)
+{
+	return req->pm_qos_class != 0;
+}
+EXPORT_SYMBOL_GPL(pm_qos_request_active);
+
 /**
  * pm_qos_add_request - inserts new qos request into the list
  * @pm_qos_class: identifies which list of qos request to us
@@ -221,25 +221,23 @@ EXPORT_SYMBOL_GPL(pm_qos_request);
  * element as a handle for use in updating and removal.  Call needs to save
  * this handle for later use.
  */
-struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value)
+void pm_qos_add_request(struct pm_qos_request_list *dep,
+			int pm_qos_class, s32 value)
 {
-	struct pm_qos_request_list *dep;
-
-	dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL);
-	if (dep) {
-		struct pm_qos_object *o =  pm_qos_array[pm_qos_class];
-		int new_value;
-
-		if (value == PM_QOS_DEFAULT_VALUE)
-			new_value = o->default_value;
-		else
-			new_value = value;
-		plist_node_init(&dep->list, new_value);
-		dep->pm_qos_class = pm_qos_class;
-		update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE);
-	}
+	struct pm_qos_object *o =  pm_qos_array[pm_qos_class];
+	int new_value;
 
-	return dep;
+	if (pm_qos_request_active(dep)) {
+		WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n");
+		return;
+	}
+	if (value == PM_QOS_DEFAULT_VALUE)
+		new_value = o->default_value;
+	else
+		new_value = value;
+	plist_node_init(&dep->list, new_value);
+	dep->pm_qos_class = pm_qos_class;
+	update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE);
 }
 EXPORT_SYMBOL_GPL(pm_qos_add_request);
 
@@ -262,6 +260,11 @@ void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
 	if (!pm_qos_req) /*guard against callers passing in null */
 		return;
 
+	if (!pm_qos_request_active(pm_qos_req)) {
+		WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n");
+		return;
+	}
+
 	o = pm_qos_array[pm_qos_req->pm_qos_class];
 
 	if (new_value == PM_QOS_DEFAULT_VALUE)
@@ -290,9 +293,14 @@ void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
 		return;
 		/* silent return to keep pcm code cleaner */
 
+	if (!pm_qos_request_active(pm_qos_req)) {
+		WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n");
+		return;
+	}
+
 	o = pm_qos_array[pm_qos_req->pm_qos_class];
 	update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE);
-	kfree(pm_qos_req);
+	memset(pm_qos_req, 0, sizeof(*pm_qos_req));
 }
 EXPORT_SYMBOL_GPL(pm_qos_remove_request);
 
@@ -340,8 +348,12 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
 
 	pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
 	if (pm_qos_class >= 0) {
-		filp->private_data = (void *) pm_qos_add_request(pm_qos_class,
-				PM_QOS_DEFAULT_VALUE);
+		struct pm_qos_request_list *req = kzalloc(GFP_KERNEL, sizeof(*req));
+		if (!req)
+			return -ENOMEM;
+
+		pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE);
+		filp->private_data = req;
 
 		if (filp->private_data)
 			return 0;
@@ -353,8 +365,9 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp)
 {
 	struct pm_qos_request_list *req;
 
-	req = (struct pm_qos_request_list *)filp->private_data;
+	req = filp->private_data;
 	pm_qos_remove_request(req);
+	kfree(req);
 
 	return 0;
 }
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 303ac04ff6e..a3b2a647924 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -451,13 +451,11 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
 	snd_pcm_timer_resolution_change(substream);
 	runtime->status->state = SNDRV_PCM_STATE_SETUP;
 
-	if (substream->latency_pm_qos_req) {
-		pm_qos_remove_request(substream->latency_pm_qos_req);
-		substream->latency_pm_qos_req = NULL;
-	}
+	if (pm_qos_request_active(&substream->latency_pm_qos_req))
+		pm_qos_remove_request(&substream->latency_pm_qos_req);
 	if ((usecs = period_to_usecs(runtime)) >= 0)
-		substream->latency_pm_qos_req = pm_qos_add_request(
-					PM_QOS_CPU_DMA_LATENCY, usecs);
+		pm_qos_add_request(&substream->latency_pm_qos_req,
+				   PM_QOS_CPU_DMA_LATENCY, usecs);
 	return 0;
  _error:
 	/* hardware might be unuseable from this time,
@@ -512,8 +510,7 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream)
 	if (substream->ops->hw_free)
 		result = substream->ops->hw_free(substream);
 	runtime->status->state = SNDRV_PCM_STATE_OPEN;
-	pm_qos_remove_request(substream->latency_pm_qos_req);
-	substream->latency_pm_qos_req = NULL;
+	pm_qos_remove_request(&substream->latency_pm_qos_req);
 	return result;
 }
 
-- 
cgit v1.2.3


From ce4410116c5debfb0e049f5db4b5cd6211e05b80 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 7 Jul 2010 23:43:45 +0200
Subject: PM / Suspend: Fix ordering of calls in suspend error paths

The ACPI suspend code calls suspend_nvs_free() at a wrong place,
which may lead to a memory leak if there's an error executing
acpi_pm_prepare(), because acpi_pm_finish() will not be called in
that case.  However, the root cause of this problem is the
apparently confusing ordering of calls in suspend error paths that
needs to be fixed.

In addition to that, fix a typo in a label name in suspend.c.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Len Brown <len.brown@intel.com>
---
 include/linux/suspend.h | 10 ++++++----
 kernel/power/suspend.c  |  9 ++++-----
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index bf1bab7b059..4af270ec220 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -61,14 +61,15 @@ typedef int __bitwise suspend_state_t;
  *	before device drivers' late suspend callbacks are executed.  It returns
  *	0 on success or a negative error code otherwise, in which case the
  *	system cannot enter the desired sleep state (@prepare_late(), @enter(),
- *	@wake(), and @finish() will not be called in that case).
+ *	and @wake() will not be called in that case).
  *
  * @prepare_late: Finish preparing the platform for entering the system sleep
  *	state indicated by @begin().
  *	@prepare_late is called before disabling nonboot CPUs and after
  *	device drivers' late suspend callbacks have been executed.  It returns
  *	0 on success or a negative error code otherwise, in which case the
- *	system cannot enter the desired sleep state (@enter() and @wake()).
+ *	system cannot enter the desired sleep state (@enter() will not be
+ *	executed).
  *
  * @enter: Enter the system sleep state indicated by @begin() or represented by
  *	the argument if @begin() is not implemented.
@@ -81,14 +82,15 @@ typedef int __bitwise suspend_state_t;
  *	resume callbacks are executed.
  *	This callback is optional, but should be implemented by the platforms
  *	that implement @prepare_late().  If implemented, it is always called
- *	after @enter(), even if @enter() fails.
+ *	after @prepare_late and @enter(), even if one of them fails.
  *
  * @finish: Finish wake-up of the platform.
  *	@finish is called right prior to calling device drivers' regular suspend
  *	callbacks.
  *	This callback is optional, but should be implemented by the platforms
  *	that implement @prepare().  If implemented, it is always called after
- *	@enter() and @wake(), if implemented, even if any of them fails.
+ *	@enter() and @wake(), even if any of them fails.  It is executed after
+ *	a failing @prepare.
  *
  * @end: Called by the PM core right after resuming devices, to indicate to
  *	the platform that the system has returned to the working state or
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 5f8d09f9432..7335952ee47 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -136,19 +136,19 @@ static int suspend_enter(suspend_state_t state)
 	if (suspend_ops->prepare) {
 		error = suspend_ops->prepare();
 		if (error)
-			return error;
+			goto Platform_finish;
 	}
 
 	error = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to power down\n");
-		goto Platfrom_finish;
+		goto Platform_finish;
 	}
 
 	if (suspend_ops->prepare_late) {
 		error = suspend_ops->prepare_late();
 		if (error)
-			goto Power_up_devices;
+			goto Platform_wake;
 	}
 
 	if (suspend_test(TEST_PLATFORM))
@@ -180,10 +180,9 @@ static int suspend_enter(suspend_state_t state)
 	if (suspend_ops->wake)
 		suspend_ops->wake();
 
- Power_up_devices:
 	dpm_resume_noirq(PMSG_RESUME);
 
- Platfrom_finish:
+ Platform_finish:
 	if (suspend_ops->finish)
 		suspend_ops->finish();
 
-- 
cgit v1.2.3


From 8d4b9d1bfef117862a2889dec4dac227068544c9 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 19 Jul 2010 02:01:06 +0200
Subject: PM / Runtime: Add runtime PM statistics (v3)

In order for PowerTOP to be able to report how well the new runtime PM is
working for the various drivers, the kernel needs to export some basic
statistics in sysfs.

This patch adds two sysfs files in the runtime PM domain that expose the
total time a device has been active, and the time a device has been
suspended.

With this PowerTOP can compute the activity percentage

Active %age = 100 * (delta active) / (delta active + delta suspended)

and present the information to the user.

I've written the PowerTOP code (slated for version 1.12) already, and the
output looks like this:

Runtime Device Power Management statistics
Active  Device name
 10.0%	06:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8101E/RTL8102E PCI Express Fast Ethernet controller

[version 2: fix stat update bugs noticed by Alan Stern]
[version 3: rebase to -next and move the sysfs declaration]

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/runtime.c | 54 ++++++++++++++++++++++++++++++++++++++------
 drivers/base/power/sysfs.c   | 30 ++++++++++++++++++++++++
 include/linux/pm.h           |  6 +++++
 3 files changed, 83 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b0ec0e9f27e..b78c401ffa7 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -123,6 +123,45 @@ int pm_runtime_idle(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_idle);
 
+
+/**
+ * update_pm_runtime_accounting - Update the time accounting of power states
+ * @dev: Device to update the accounting for
+ *
+ * In order to be able to have time accounting of the various power states
+ * (as used by programs such as PowerTOP to show the effectiveness of runtime
+ * PM), we need to track the time spent in each state.
+ * update_pm_runtime_accounting must be called each time before the
+ * runtime_status field is updated, to account the time in the old state
+ * correctly.
+ */
+void update_pm_runtime_accounting(struct device *dev)
+{
+	unsigned long now = jiffies;
+	int delta;
+
+	delta = now - dev->power.accounting_timestamp;
+
+	if (delta < 0)
+		delta = 0;
+
+	dev->power.accounting_timestamp = now;
+
+	if (dev->power.disable_depth > 0)
+		return;
+
+	if (dev->power.runtime_status == RPM_SUSPENDED)
+		dev->power.suspended_jiffies += delta;
+	else
+		dev->power.active_jiffies += delta;
+}
+
+static void __update_runtime_status(struct device *dev, enum rpm_status status)
+{
+	update_pm_runtime_accounting(dev);
+	dev->power.runtime_status = status;
+}
+
 /**
  * __pm_runtime_suspend - Carry out run-time suspend of given device.
  * @dev: Device to suspend.
@@ -197,7 +236,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 		goto repeat;
 	}
 
-	dev->power.runtime_status = RPM_SUSPENDING;
+	__update_runtime_status(dev, RPM_SUSPENDING);
 	dev->power.deferred_resume = false;
 
 	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) {
@@ -228,7 +267,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 	}
 
 	if (retval) {
-		dev->power.runtime_status = RPM_ACTIVE;
+		__update_runtime_status(dev, RPM_ACTIVE);
 		if (retval == -EAGAIN || retval == -EBUSY) {
 			if (dev->power.timer_expires == 0)
 				notify = true;
@@ -237,7 +276,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 			pm_runtime_cancel_pending(dev);
 		}
 	} else {
-		dev->power.runtime_status = RPM_SUSPENDED;
+		__update_runtime_status(dev, RPM_SUSPENDED);
 		pm_runtime_deactivate_timer(dev);
 
 		if (dev->parent) {
@@ -381,7 +420,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 		goto repeat;
 	}
 
-	dev->power.runtime_status = RPM_RESUMING;
+	__update_runtime_status(dev, RPM_RESUMING);
 
 	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) {
 		spin_unlock_irq(&dev->power.lock);
@@ -411,10 +450,10 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 	}
 
 	if (retval) {
-		dev->power.runtime_status = RPM_SUSPENDED;
+		__update_runtime_status(dev, RPM_SUSPENDED);
 		pm_runtime_cancel_pending(dev);
 	} else {
-		dev->power.runtime_status = RPM_ACTIVE;
+		__update_runtime_status(dev, RPM_ACTIVE);
 		if (parent)
 			atomic_inc(&parent->power.child_count);
 	}
@@ -848,7 +887,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 	}
 
  out_set:
-	dev->power.runtime_status = status;
+	__update_runtime_status(dev, status);
 	dev->power.runtime_error = 0;
  out:
 	spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -1077,6 +1116,7 @@ void pm_runtime_init(struct device *dev)
 	dev->power.request_pending = false;
 	dev->power.request = RPM_REQ_NONE;
 	dev->power.deferred_resume = false;
+	dev->power.accounting_timestamp = jiffies;
 	INIT_WORK(&dev->power.work, pm_runtime_work);
 
 	dev->power.timer_expires = 0;
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 1eca50c8e7c..e56b4388fe6 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -6,6 +6,7 @@
 #include <linux/string.h>
 #include <linux/pm_runtime.h>
 #include <asm/atomic.h>
+#include <linux/jiffies.h>
 #include "power.h"
 
 /*
@@ -111,6 +112,33 @@ static ssize_t control_store(struct device * dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(control, 0644, control_show, control_store);
 
+static ssize_t rtpm_active_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int ret;
+	spin_lock_irq(&dev->power.lock);
+	update_pm_runtime_accounting(dev);
+	ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies));
+	spin_unlock_irq(&dev->power.lock);
+	return ret;
+}
+
+static DEVICE_ATTR(runtime_active_time, 0444, rtpm_active_time_show, NULL);
+
+static ssize_t rtpm_suspended_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int ret;
+	spin_lock_irq(&dev->power.lock);
+	update_pm_runtime_accounting(dev);
+	ret = sprintf(buf, "%i\n",
+		jiffies_to_msecs(dev->power.suspended_jiffies));
+	spin_unlock_irq(&dev->power.lock);
+	return ret;
+}
+
+static DEVICE_ATTR(runtime_suspended_time, 0444, rtpm_suspended_time_show, NULL);
+
 static ssize_t rtpm_status_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -254,6 +282,8 @@ static struct attribute * power_attrs[] = {
 #ifdef CONFIG_PM_RUNTIME
 	&dev_attr_control.attr,
 	&dev_attr_runtime_status.attr,
+	&dev_attr_runtime_suspended_time.attr,
+	&dev_attr_runtime_active_time.attr,
 #endif
 	&dev_attr_wakeup.attr,
 #ifdef CONFIG_PM_SLEEP
diff --git a/include/linux/pm.h b/include/linux/pm.h
index b417fc46f3f..52e8c55ff31 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -477,9 +477,15 @@ struct dev_pm_info {
 	enum rpm_request	request;
 	enum rpm_status		runtime_status;
 	int			runtime_error;
+	unsigned long		active_jiffies;
+	unsigned long		suspended_jiffies;
+	unsigned long		accounting_timestamp;
 #endif
 };
 
+extern void update_pm_runtime_accounting(struct device *dev);
+
+
 /*
  * The PM_EVENT_ messages are also used by drivers implementing the legacy
  * suspend framework, based on the ->suspend() and ->resume() callbacks common
-- 
cgit v1.2.3