summaryrefslogtreecommitdiff
path: root/drivers/pci
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2019-06-24 10:11:27 +0200
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2019-06-24 10:11:27 +0200
commit25bc694a8a086bfee6e5b9dd9e53f4de721b0acf (patch)
treea843d17b336c9ea7d4cad67843ebbf255d61ea1f /drivers/pci
parent3e26c5feed2add218046ecf91bab3cfa9bf762a6 (diff)
parent000dd5316e1c756a1c028f22e01d06a38249dd4d (diff)
Merge back PCI power management material for v5.3.
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/p2pdma.c115
-rw-r--r--drivers/pci/pci-driver.c22
-rw-r--r--drivers/pci/pci.c95
-rw-r--r--drivers/pci/pci.h4
-rw-r--r--drivers/pci/pcie/portdrv_core.c66
5 files changed, 220 insertions, 82 deletions
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 742928d0053e..a98126ad9c3a 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -20,12 +20,16 @@
#include <linux/seq_buf.h>
struct pci_p2pdma {
- struct percpu_ref devmap_ref;
- struct completion devmap_ref_done;
struct gen_pool *pool;
bool p2pmem_published;
};
+struct p2pdma_pagemap {
+ struct dev_pagemap pgmap;
+ struct percpu_ref ref;
+ struct completion ref_done;
+};
+
static ssize_t size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -74,41 +78,45 @@ static const struct attribute_group p2pmem_group = {
.name = "p2pmem",
};
+static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref)
+{
+ return container_of(ref, struct p2pdma_pagemap, ref);
+}
+
static void pci_p2pdma_percpu_release(struct percpu_ref *ref)
{
- struct pci_p2pdma *p2p =
- container_of(ref, struct pci_p2pdma, devmap_ref);
+ struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
- complete_all(&p2p->devmap_ref_done);
+ complete(&p2p_pgmap->ref_done);
}
static void pci_p2pdma_percpu_kill(struct percpu_ref *ref)
{
- /*
- * pci_p2pdma_add_resource() may be called multiple times
- * by a driver and may register the percpu_kill devm action multiple
- * times. We only want the first action to actually kill the
- * percpu_ref.
- */
- if (percpu_ref_is_dying(ref))
- return;
-
percpu_ref_kill(ref);
}
+static void pci_p2pdma_percpu_cleanup(struct percpu_ref *ref)
+{
+ struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
+
+ wait_for_completion(&p2p_pgmap->ref_done);
+ percpu_ref_exit(&p2p_pgmap->ref);
+}
+
static void pci_p2pdma_release(void *data)
{
struct pci_dev *pdev = data;
+ struct pci_p2pdma *p2pdma = pdev->p2pdma;
- if (!pdev->p2pdma)
+ if (!p2pdma)
return;
- wait_for_completion(&pdev->p2pdma->devmap_ref_done);
- percpu_ref_exit(&pdev->p2pdma->devmap_ref);
+ /* Flush and disable pci_alloc_p2p_mem() */
+ pdev->p2pdma = NULL;
+ synchronize_rcu();
- gen_pool_destroy(pdev->p2pdma->pool);
+ gen_pool_destroy(p2pdma->pool);
sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
- pdev->p2pdma = NULL;
}
static int pci_p2pdma_setup(struct pci_dev *pdev)
@@ -124,12 +132,6 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
if (!p2p->pool)
goto out;
- init_completion(&p2p->devmap_ref_done);
- error = percpu_ref_init(&p2p->devmap_ref,
- pci_p2pdma_percpu_release, 0, GFP_KERNEL);
- if (error)
- goto out_pool_destroy;
-
error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
if (error)
goto out_pool_destroy;
@@ -163,6 +165,7 @@ out:
int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
u64 offset)
{
+ struct p2pdma_pagemap *p2p_pgmap;
struct dev_pagemap *pgmap;
void *addr;
int error;
@@ -185,18 +188,27 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
return error;
}
- pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
- if (!pgmap)
+ p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
+ if (!p2p_pgmap)
return -ENOMEM;
+ init_completion(&p2p_pgmap->ref_done);
+ error = percpu_ref_init(&p2p_pgmap->ref,
+ pci_p2pdma_percpu_release, 0, GFP_KERNEL);
+ if (error)
+ goto pgmap_free;
+
+ pgmap = &p2p_pgmap->pgmap;
+
pgmap->res.start = pci_resource_start(pdev, bar) + offset;
pgmap->res.end = pgmap->res.start + size - 1;
pgmap->res.flags = pci_resource_flags(pdev, bar);
- pgmap->ref = &pdev->p2pdma->devmap_ref;
+ pgmap->ref = &p2p_pgmap->ref;
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
pci_resource_start(pdev, bar);
pgmap->kill = pci_p2pdma_percpu_kill;
+ pgmap->cleanup = pci_p2pdma_percpu_cleanup;
addr = devm_memremap_pages(&pdev->dev, pgmap);
if (IS_ERR(addr)) {
@@ -204,19 +216,22 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
goto pgmap_free;
}
- error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr,
+ error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset,
- resource_size(&pgmap->res), dev_to_node(&pdev->dev));
+ resource_size(&pgmap->res), dev_to_node(&pdev->dev),
+ &p2p_pgmap->ref);
if (error)
- goto pgmap_free;
+ goto pages_free;
pci_info(pdev, "added peer-to-peer DMA memory %pR\n",
&pgmap->res);
return 0;
+pages_free:
+ devm_memunmap_pages(&pdev->dev, pgmap);
pgmap_free:
- devm_kfree(&pdev->dev, pgmap);
+ devm_kfree(&pdev->dev, p2p_pgmap);
return error;
}
EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
@@ -585,19 +600,30 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
*/
void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
{
- void *ret;
+ void *ret = NULL;
+ struct percpu_ref *ref;
+ /*
+ * Pairs with synchronize_rcu() in pci_p2pdma_release() to
+ * ensure pdev->p2pdma is non-NULL for the duration of the
+ * read-lock.
+ */
+ rcu_read_lock();
if (unlikely(!pdev->p2pdma))
- return NULL;
-
- if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref)))
- return NULL;
-
- ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size);
+ goto out;
- if (unlikely(!ret))
- percpu_ref_put(&pdev->p2pdma->devmap_ref);
+ ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size,
+ (void **) &ref);
+ if (!ret)
+ goto out;
+ if (unlikely(!percpu_ref_tryget_live(ref))) {
+ gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size);
+ ret = NULL;
+ goto out;
+ }
+out:
+ rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
@@ -610,8 +636,11 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
*/
void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
{
- gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size);
- percpu_ref_put(&pdev->p2pdma->devmap_ref);
+ struct percpu_ref *ref;
+
+ gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size,
+ (void **) &ref);
+ percpu_ref_put(ref);
}
EXPORT_SYMBOL_GPL(pci_free_p2pmem);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 98af9ecd4a90..bd097ea5925c 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -678,6 +678,7 @@ static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev)
static int pci_pm_prepare(struct device *dev)
{
struct device_driver *drv = dev->driver;
+ struct pci_dev *pci_dev = to_pci_dev(dev);
if (drv && drv->pm && drv->pm->prepare) {
int error = drv->pm->prepare(dev);
@@ -687,7 +688,15 @@ static int pci_pm_prepare(struct device *dev)
if (!error && dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_PREPARE))
return 0;
}
- return pci_dev_keep_suspended(to_pci_dev(dev));
+ if (pci_dev_need_resume(pci_dev))
+ return 0;
+
+ /*
+ * The PME setting needs to be adjusted here in case the direct-complete
+ * optimization is used with respect to this device.
+ */
+ pci_dev_adjust_pme(pci_dev);
+ return 1;
}
static void pci_pm_complete(struct device *dev)
@@ -757,9 +766,11 @@ static int pci_pm_suspend(struct device *dev)
* better to resume the device from runtime suspend here.
*/
if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) ||
- !pci_dev_keep_suspended(pci_dev)) {
+ pci_dev_need_resume(pci_dev)) {
pm_runtime_resume(dev);
pci_dev->state_saved = false;
+ } else {
+ pci_dev_adjust_pme(pci_dev);
}
if (pm->suspend) {
@@ -1130,10 +1141,13 @@ static int pci_pm_poweroff(struct device *dev)
/* The reason to do that is the same as in pci_pm_suspend(). */
if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) ||
- !pci_dev_keep_suspended(pci_dev))
+ pci_dev_need_resume(pci_dev)) {
pm_runtime_resume(dev);
+ pci_dev->state_saved = false;
+ } else {
+ pci_dev_adjust_pme(pci_dev);
+ }
- pci_dev->state_saved = false;
if (pm->poweroff) {
int error;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 8abc843b1615..e34fb2b3c466 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1004,15 +1004,10 @@ static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state)
if (state == PCI_D0) {
pci_platform_power_transition(dev, PCI_D0);
/*
- * Mandatory power management transition delays, see
- * PCI Express Base Specification Revision 2.0 Section
- * 6.6.1: Conventional Reset. Do not delay for
- * devices powered on/off by corresponding bridge,
- * because have already delayed for the bridge.
+ * Mandatory power management transition delays are
+ * handled in the PCIe portdrv resume hooks.
*/
if (dev->runtime_d3cold) {
- if (dev->d3cold_delay && !dev->imm_ready)
- msleep(dev->d3cold_delay);
/*
* When powering on a bridge from D3cold, the
* whole hierarchy may be powered on into
@@ -2065,6 +2060,13 @@ static void pci_pme_list_scan(struct work_struct *work)
*/
if (bridge && bridge->current_state != PCI_D0)
continue;
+ /*
+ * If the device is in D3cold it should not be
+ * polled either.
+ */
+ if (pme_dev->dev->current_state == PCI_D3cold)
+ continue;
+
pci_pme_wakeup(pme_dev->dev, NULL);
} else {
list_del(&pme_dev->list);
@@ -2459,45 +2461,56 @@ bool pci_dev_run_wake(struct pci_dev *dev)
EXPORT_SYMBOL_GPL(pci_dev_run_wake);
/**
- * pci_dev_keep_suspended - Check if the device can stay in the suspended state.
+ * pci_dev_need_resume - Check if it is necessary to resume the device.
* @pci_dev: Device to check.
*
- * Return 'true' if the device is runtime-suspended, it doesn't have to be
+ * Return 'true' if the device is not runtime-suspended or it has to be
* reconfigured due to wakeup settings difference between system and runtime
- * suspend and the current power state of it is suitable for the upcoming
- * (system) transition.
- *
- * If the device is not configured for system wakeup, disable PME for it before
- * returning 'true' to prevent it from waking up the system unnecessarily.
+ * suspend, or the current power state of it is not suitable for the upcoming
+ * (system-wide) transition.
*/
-bool pci_dev_keep_suspended(struct pci_dev *pci_dev)
+bool pci_dev_need_resume(struct pci_dev *pci_dev)
{
struct device *dev = &pci_dev->dev;
- bool wakeup = device_may_wakeup(dev);
+ pci_power_t target_state;
- if (!pm_runtime_suspended(dev)
- || pci_target_state(pci_dev, wakeup) != pci_dev->current_state
- || platform_pci_need_resume(pci_dev))
- return false;
+ if (!pm_runtime_suspended(dev) || platform_pci_need_resume(pci_dev))
+ return true;
+
+ target_state = pci_target_state(pci_dev, device_may_wakeup(dev));
/*
- * At this point the device is good to go unless it's been configured
- * to generate PME at the runtime suspend time, but it is not supposed
- * to wake up the system. In that case, simply disable PME for it
- * (it will have to be re-enabled on exit from system resume).
- *
- * If the device's power state is D3cold and the platform check above
- * hasn't triggered, the device's configuration is suitable and we don't
- * need to manipulate it at all.
+ * If the earlier platform check has not triggered, D3cold is just power
+ * removal on top of D3hot, so no need to resume the device in that
+ * case.
*/
+ return target_state != pci_dev->current_state &&
+ target_state != PCI_D3cold &&
+ pci_dev->current_state != PCI_D3hot;
+}
+
+/**
+ * pci_dev_adjust_pme - Adjust PME setting for a suspended device.
+ * @pci_dev: Device to check.
+ *
+ * If the device is suspended and it is not configured for system wakeup,
+ * disable PME for it to prevent it from waking up the system unnecessarily.
+ *
+ * Note that if the device's power state is D3cold and the platform check in
+ * pci_dev_need_resume() has not triggered, the device's configuration need not
+ * be changed.
+ */
+void pci_dev_adjust_pme(struct pci_dev *pci_dev)
+{
+ struct device *dev = &pci_dev->dev;
+
spin_lock_irq(&dev->power.lock);
- if (pm_runtime_suspended(dev) && pci_dev->current_state < PCI_D3cold &&
- !wakeup)
+ if (pm_runtime_suspended(dev) && !device_may_wakeup(dev) &&
+ pci_dev->current_state < PCI_D3cold)
__pci_pme_active(pci_dev, false);
spin_unlock_irq(&dev->power.lock);
- return true;
}
/**
@@ -4568,14 +4581,16 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS);
}
+
/**
- * pcie_wait_for_link - Wait until link is active or inactive
+ * pcie_wait_for_link_delay - Wait until link is active or inactive
* @pdev: Bridge device
* @active: waiting for active or inactive?
+ * @delay: Delay to wait after link has become active (in ms)
*
* Use this to wait till link becomes active or inactive.
*/
-bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
+bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, int delay)
{
int timeout = 1000;
bool ret;
@@ -4612,13 +4627,25 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
timeout -= 10;
}
if (active && ret)
- msleep(100);
+ msleep(delay);
else if (ret != active)
pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n",
active ? "set" : "cleared");
return ret == active;
}
+/**
+ * pcie_wait_for_link - Wait until link is active or inactive
+ * @pdev: Bridge device
+ * @active: waiting for active or inactive?
+ *
+ * Use this to wait till link becomes active or inactive.
+ */
+bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
+{
+ return pcie_wait_for_link_delay(pdev, active, 100);
+}
+
void pci_reset_secondary_bus(struct pci_dev *dev)
{
u16 ctrl;
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9cb99380c61e..7f9fd93270ed 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -82,7 +82,8 @@ int pci_finish_runtime_suspend(struct pci_dev *dev);
void pcie_clear_root_pme_status(struct pci_dev *dev);
int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
void pci_pme_restore(struct pci_dev *dev);
-bool pci_dev_keep_suspended(struct pci_dev *dev);
+bool pci_dev_need_resume(struct pci_dev *dev);
+void pci_dev_adjust_pme(struct pci_dev *dev);
void pci_dev_complete_resume(struct pci_dev *pci_dev);
void pci_config_pm_runtime_get(struct pci_dev *dev);
void pci_config_pm_runtime_put(struct pci_dev *dev);
@@ -493,6 +494,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
u32 service);
+bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, int delay);
bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
#ifdef CONFIG_PCIEASPM
void pcie_aspm_init_link_state(struct pci_dev *pdev);
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 1b330129089f..308c3e0c4a34 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/kernel.h>
+#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
@@ -378,6 +379,67 @@ static int pm_iter(struct device *dev, void *data)
return 0;
}
+static int get_downstream_delay(struct pci_bus *bus)
+{
+ struct pci_dev *pdev;
+ int min_delay = 100;
+ int max_delay = 0;
+
+ list_for_each_entry(pdev, &bus->devices, bus_list) {
+ if (!pdev->imm_ready)
+ min_delay = 0;
+ else if (pdev->d3cold_delay < min_delay)
+ min_delay = pdev->d3cold_delay;
+ if (pdev->d3cold_delay > max_delay)
+ max_delay = pdev->d3cold_delay;
+ }
+
+ return max(min_delay, max_delay);
+}
+
+/*
+ * wait_for_downstream_link - Wait for downstream link to establish
+ * @pdev: PCIe port whose downstream link is waited
+ *
+ * Handle delays according to PCIe 4.0 section 6.6.1 before configuration
+ * access to the downstream component is permitted.
+ *
+ * This blocks PCI core resume of the hierarchy below this port until the
+ * link is trained. Should be called before resuming port services to
+ * prevent pciehp from starting to tear-down the hierarchy too soon.
+ */
+static void wait_for_downstream_link(struct pci_dev *pdev)
+{
+ int delay;
+
+ if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT &&
+ pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM)
+ return;
+
+ if (pci_dev_is_disconnected(pdev))
+ return;
+
+ if (!pdev->subordinate || list_empty(&pdev->subordinate->devices) ||
+ !pdev->bridge_d3)
+ return;
+
+ delay = get_downstream_delay(pdev->subordinate);
+ if (!delay)
+ return;
+
+ dev_dbg(&pdev->dev, "waiting downstream link for %d ms\n", delay);
+
+ /*
+ * If downstream port does not support speeds greater than 5 GT/s
+ * need to wait 100ms. For higher speeds (gen3) we need to wait
+ * first for the data link layer to become active.
+ */
+ if (pcie_get_speed_cap(pdev) <= PCIE_SPEED_5_0GT)
+ msleep(delay);
+ else
+ pcie_wait_for_link_delay(pdev, true, delay);
+}
+
/**
* pcie_port_device_suspend - suspend port services associated with a PCIe port
* @dev: PCI Express port to handle
@@ -391,6 +453,8 @@ int pcie_port_device_suspend(struct device *dev)
int pcie_port_device_resume_noirq(struct device *dev)
{
size_t off = offsetof(struct pcie_port_service_driver, resume_noirq);
+
+ wait_for_downstream_link(to_pci_dev(dev));
return device_for_each_child(dev, &off, pm_iter);
}
@@ -421,6 +485,8 @@ int pcie_port_device_runtime_suspend(struct device *dev)
int pcie_port_device_runtime_resume(struct device *dev)
{
size_t off = offsetof(struct pcie_port_service_driver, runtime_resume);
+
+ wait_for_downstream_link(to_pci_dev(dev));
return device_for_each_child(dev, &off, pm_iter);
}
#endif /* PM */