From 16d79cd4e23b1964d36c041ab027505ceacbbeeb Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 2 Jul 2020 18:26:49 +0200 Subject: PCI: Use 'pci_channel_state_t' instead of 'enum pci_channel_state' The method struct pci_error_handlers.error_detected() is defined and documented as taking an 'enum pci_channel_state' for the second argument, but most drivers use 'pci_channel_state_t' instead. This 'pci_channel_state_t' is not a typedef for the enum but a typedef for a bitwise type in order to have better/stricter typechecking. Consolidate everything by using 'pci_channel_state_t' in the method's definition, in the related helpers and in the drivers. Enforce use of 'pci_channel_state_t' by replacing 'enum pci_channel_state' with an anonymous 'enum'. Note: Currently, from a typechecking point of view this patch changes nothing because only the constants defined by the enum are bitwise, not the enum itself (sparse doesn't have the notion of 'bitwise enum'). This may change in some not too far future, hence the patch. [bhelgaas: squash in https://lore.kernel.org/r/20200702162651.49526-3-luc.vanoostenryck@gmail.com https://lore.kernel.org/r/20200702162651.49526-4-luc.vanoostenryck@gmail.com] Link: https://lore.kernel.org/r/20200702162651.49526-2-luc.vanoostenryck@gmail.com Signed-off-by: Luc Van Oostenryck Signed-off-by: Bjorn Helgaas --- Documentation/PCI/pci-error-recovery.rst | 8 ++++---- arch/powerpc/kernel/eeh_driver.c | 2 +- drivers/block/rsxx/core.c | 2 +- drivers/dma/ioat/init.c | 2 +- drivers/media/pci/ngene/ngene-cards.c | 2 +- drivers/misc/genwqe/card_base.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 4 ++-- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/ethernet/sfc/falcon/efx.c | 2 +- drivers/pci/pci.h | 2 +- drivers/pci/pcie/err.c | 4 ++-- drivers/pci/pcie/portdrv_pci.c | 2 +- drivers/scsi/aacraid/linit.c | 2 +- drivers/scsi/sym53c8xx_2/sym_glue.c | 2 +- drivers/staging/qlge/qlge_main.c | 2 +- include/linux/pci.h | 4 ++-- 18 files changed, 24 insertions(+), 24 deletions(-) diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst index 13beee23cb04..ccd713423133 100644 --- a/Documentation/PCI/pci-error-recovery.rst +++ b/Documentation/PCI/pci-error-recovery.rst @@ -79,7 +79,7 @@ This structure has the form:: struct pci_error_handlers { - int (*error_detected)(struct pci_dev *dev, enum pci_channel_state); + int (*error_detected)(struct pci_dev *dev, pci_channel_state_t); int (*mmio_enabled)(struct pci_dev *dev); int (*slot_reset)(struct pci_dev *dev); void (*resume)(struct pci_dev *dev); @@ -87,11 +87,11 @@ This structure has the form:: The possible channel states are:: - enum pci_channel_state { + typedef enum { pci_channel_io_normal, /* I/O channel is in normal state */ pci_channel_io_frozen, /* I/O to channel is blocked */ pci_channel_io_perm_failure, /* PCI card is dead */ - }; + } pci_channel_state_t; Possible return values are:: @@ -348,7 +348,7 @@ STEP 6: Permanent Failure ------------------------- A "permanent failure" has occurred, and the platform cannot recover the device. The platform will call error_detected() with a -pci_channel_state value of pci_channel_io_perm_failure. +pci_channel_state_t value of pci_channel_io_perm_failure. The device driver should, at this point, assume the worst. It should cancel all pending I/O, refuse all new I/O, returning -EIO to diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7b048cee767c..ab8806d2e03e 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -214,7 +214,7 @@ static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata) pci_save_state(pdev); } -static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s) +static void eeh_set_channel_state(struct eeh_pe *root, pci_channel_state_t s) { struct eeh_pe *pe; struct eeh_dev *edev, *tmp; diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 10f6368117d8..34e937dd6bca 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -625,7 +625,7 @@ static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card) } static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev, - enum pci_channel_state error) + pci_channel_state_t error) { int st; diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 58d13564f88b..089893f2bbb8 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1267,7 +1267,7 @@ static void ioat_resume(struct ioatdma_device *ioat_dma) #define DRV_NAME "ioatdma" static pci_ers_result_t ioat_pcie_error_detected(struct pci_dev *pdev, - enum pci_channel_state error) + pci_channel_state_t error) { dev_dbg(&pdev->dev, "%s: PCIe AER error %d\n", DRV_NAME, error); diff --git a/drivers/media/pci/ngene/ngene-cards.c b/drivers/media/pci/ngene/ngene-cards.c index 6185806a00e0..8bfb3d8ea610 100644 --- a/drivers/media/pci/ngene/ngene-cards.c +++ b/drivers/media/pci/ngene/ngene-cards.c @@ -1186,7 +1186,7 @@ MODULE_DEVICE_TABLE(pci, ngene_id_tbl); /****************************************************************************/ static pci_ers_result_t ngene_error_detected(struct pci_dev *dev, - enum pci_channel_state state) + pci_channel_state_t state) { dev_err(&dev->dev, "PCI error\n"); if (state == pci_channel_io_perm_failure) diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c index 1dc6c7c5cbce..97b8ecc42383 100644 --- a/drivers/misc/genwqe/card_base.c +++ b/drivers/misc/genwqe/card_base.c @@ -1240,7 +1240,7 @@ static void genwqe_remove(struct pci_dev *pci_dev) * error is detected. */ static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev, - enum pci_channel_state state) + pci_channel_state_t state) { struct genwqe_dev *cd; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5d807c8004f8..f0de2d1842b4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -15465,7 +15465,7 @@ unmap: * remediation. **/ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, - enum pci_channel_state error) + pci_channel_state_t error) { struct i40e_pf *pf = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 082825e3cb39..4dd9226a12df 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3586,7 +3586,7 @@ static void ice_remove(struct pci_dev *pdev) * is in progress. Allows the driver to gracefully prepare/handle PCI errors. */ static pci_ers_result_t -ice_pci_err_detected(struct pci_dev *pdev, enum pci_channel_state err) +ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err) { struct ice_pf *pf = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index b64e91ea3465..00db4b5863b1 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -82,7 +82,7 @@ static int ixgb_vlan_rx_kill_vid(struct net_device *netdev, static void ixgb_restore_vlan(struct ixgb_adapter *adapter); static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev, - enum pci_channel_state state); + pci_channel_state_t state); static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev); static void ixgb_io_resume (struct pci_dev *pdev); @@ -2194,7 +2194,7 @@ ixgb_restore_vlan(struct ixgb_adapter *adapter) * a PCI bus error is detected. */ static pci_ers_result_t ixgb_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct ixgb_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 256807c28ff7..ed627aff7b36 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1519,7 +1519,7 @@ static const struct dev_pm_ops efx_pm_ops = { * Stop the software path and request a slot reset. */ static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; struct efx_nic *efx = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 42bcd34fc508..f8979991970e 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -3118,7 +3118,7 @@ static const struct dev_pm_ops ef4_pm_ops = { * Stop the software path and request a slot reset. */ static pci_ers_result_t ef4_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; struct ef4_nic *efx = pci_get_drvdata(pdev); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6d3f75867106..c6c0c455f59f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -555,7 +555,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) /* PCI error reporting and recovery */ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, - enum pci_channel_state state, + pci_channel_state_t state, pci_ers_result_t (*reset_link)(struct pci_dev *pdev)); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 14bb8f54723e..467686ee2d8b 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -46,7 +46,7 @@ static pci_ers_result_t merge_result(enum pci_ers_result orig, } static int report_error_detected(struct pci_dev *dev, - enum pci_channel_state state, + pci_channel_state_t state, enum pci_ers_result *result) { pci_ers_result_t vote; @@ -147,7 +147,7 @@ out: } pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, - enum pci_channel_state state, + pci_channel_state_t state, pci_ers_result_t (*reset_link)(struct pci_dev *pdev)) { pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 3acf151ae015..3a3ce40ae1ab 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -146,7 +146,7 @@ static void pcie_portdrv_remove(struct pci_dev *dev) } static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, - enum pci_channel_state error) + pci_channel_state_t error) { /* Root Port has no impact. Always recovers. */ return PCI_ERS_RESULT_CAN_RECOVER; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index a308e86a97f1..37f65602b0ec 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -2002,7 +2002,7 @@ static void aac_remove_one(struct pci_dev *pdev) } static pci_ers_result_t aac_pci_error_detected(struct pci_dev *pdev, - enum pci_channel_state error) + pci_channel_state_t error) { struct Scsi_Host *shost = pci_get_drvdata(pdev); struct aac_dev *aac = shost_priv(shost); diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index 2ca018ce796f..f455243bdb9b 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -1743,7 +1743,7 @@ static void sym2_remove(struct pci_dev *pdev) * @state: current state of the PCI slot */ static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { /* If slot is permanently frozen, turn everything off */ if (state == pci_channel_io_perm_failure) { diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 402edaeffe12..ac30aefe49a1 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4678,7 +4678,7 @@ static void ql_eeh_close(struct net_device *ndev) * a PCI bus error is detected. */ static pci_ers_result_t qlge_io_error_detected(struct pci_dev *pdev, - enum pci_channel_state state) + pci_channel_state_t state) { struct net_device *ndev = pci_get_drvdata(pdev); struct ql_adapter *qdev = netdev_priv(ndev); diff --git a/include/linux/pci.h b/include/linux/pci.h index c79d83304e52..adcee9e30bfa 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -179,7 +179,7 @@ static inline const char *pci_power_name(pci_power_t state) */ typedef unsigned int __bitwise pci_channel_state_t; -enum pci_channel_state { +enum { /* I/O channel is in normal state */ pci_channel_io_normal = (__force pci_channel_state_t) 1, @@ -785,7 +785,7 @@ enum pci_ers_result { struct pci_error_handlers { /* PCI bus error detected on this device */ pci_ers_result_t (*error_detected)(struct pci_dev *dev, - enum pci_channel_state error); + pci_channel_state_t error); /* MMIO has been re-enabled, but not DMA */ pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev); -- cgit v1.2.3 From 0678e3109a3cf438a8d1c8e3c3e58a45a59df262 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Jul 2020 18:48:17 -0500 Subject: PCI/AER: Simplify __aer_print_error() aer_correctable_error_string[] and aer_uncorrectable_error_string[] have descriptions of AER error status bits. Add NULL entries to these tables so all entries for bits 0-31 are defined. Then we don't have to check for ARRAY_SIZE() when decoding a status word, which simplifies __aer_print_error(). Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 48 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 3acf56683915..9176c8a968b9 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -447,7 +447,7 @@ static const char *aer_error_layer[] = { "Transaction Layer" }; -static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = { +static const char *aer_correctable_error_string[] = { "RxErr", /* Bit Position 0 */ NULL, NULL, @@ -464,9 +464,25 @@ static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = { "NonFatalErr", /* Bit Position 13 */ "CorrIntErr", /* Bit Position 14 */ "HeaderOF", /* Bit Position 15 */ + NULL, /* Bit Position 16 */ + NULL, /* Bit Position 17 */ + NULL, /* Bit Position 18 */ + NULL, /* Bit Position 19 */ + NULL, /* Bit Position 20 */ + NULL, /* Bit Position 21 */ + NULL, /* Bit Position 22 */ + NULL, /* Bit Position 23 */ + NULL, /* Bit Position 24 */ + NULL, /* Bit Position 25 */ + NULL, /* Bit Position 26 */ + NULL, /* Bit Position 27 */ + NULL, /* Bit Position 28 */ + NULL, /* Bit Position 29 */ + NULL, /* Bit Position 30 */ + NULL, /* Bit Position 31 */ }; -static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = { +static const char *aer_uncorrectable_error_string[] = { "Undefined", /* Bit Position 0 */ NULL, NULL, @@ -494,6 +510,11 @@ static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = { "AtomicOpBlocked", /* Bit Position 24 */ "TLPBlockedErr", /* Bit Position 25 */ "PoisonTLPBlocked", /* Bit Position 26 */ + NULL, /* Bit Position 27 */ + NULL, /* Bit Position 28 */ + NULL, /* Bit Position 29 */ + NULL, /* Bit Position 30 */ + NULL, /* Bit Position 31 */ }; static const char *aer_agent_string[] = { @@ -650,24 +671,23 @@ static void __print_tlp_header(struct pci_dev *dev, static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info) { + const char **strings; unsigned long status = info->status & ~info->mask; - const char *errmsg = NULL; + const char *errmsg; int i; + if (info->severity == AER_CORRECTABLE) + strings = aer_correctable_error_string; + else + strings = aer_uncorrectable_error_string; + for_each_set_bit(i, &status, 32) { - if (info->severity == AER_CORRECTABLE) - errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ? - aer_correctable_error_string[i] : NULL; - else - errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ? - aer_uncorrectable_error_string[i] : NULL; + errmsg = strings[i]; + if (!errmsg) + errmsg = "Unknown Error Bit"; - if (errmsg) - pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, + pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, info->first_error == i ? " (First)" : ""); - else - pci_err(dev, " [%2d] Unknown Error Bit%s\n", - i, info->first_error == i ? " (First)" : ""); } pci_dev_aer_stats_incr(dev, info); } -- cgit v1.2.3 From e83e2ca3c39553a9d0fd497d9c839b341e38c742 Mon Sep 17 00:00:00 2001 From: Matt Jolly Date: Fri, 19 Jun 2020 01:55:11 +1000 Subject: PCI/AER: Log correctable errors as warning, not error PCIe correctable errors are recovered by hardware with no need for software intervention (PCIe r5.0, sec 6.2.2.1). Reduce the log level of correctable errors from KERN_ERR to KERN_WARNING. The bug reports below are for correctable error logging. This doesn't fix the cause of those reports, but it may make the messages less alarming. [bhelgaas: commit log, use pci_printk() to avoid code duplication] Link: https://bugzilla.kernel.org/show_bug.cgi?id=201517 Link: https://bugzilla.kernel.org/show_bug.cgi?id=196183 Link: https://lore.kernel.org/r/20200618155511.16009-1-Kangie@footclan.ninja Signed-off-by: Matt Jolly Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 9176c8a968b9..ca886bf91fd9 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -673,20 +673,23 @@ static void __aer_print_error(struct pci_dev *dev, { const char **strings; unsigned long status = info->status & ~info->mask; - const char *errmsg; + const char *level, *errmsg; int i; - if (info->severity == AER_CORRECTABLE) + if (info->severity == AER_CORRECTABLE) { strings = aer_correctable_error_string; - else + level = KERN_WARNING; + } else { strings = aer_uncorrectable_error_string; + level = KERN_ERR; + } for_each_set_bit(i, &status, 32) { errmsg = strings[i]; if (!errmsg) errmsg = "Unknown Error Bit"; - pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, + pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg, info->first_error == i ? " (First)" : ""); } pci_dev_aer_stats_incr(dev, info); @@ -696,6 +699,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) { int layer, agent; int id = ((dev->bus->number << 8) | dev->devfn); + const char *level; if (!info->status) { pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", @@ -706,13 +710,14 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", - aer_error_severity_string[info->severity], - aer_error_layer[layer], aer_agent_string[agent]); + level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; + + pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", + aer_error_severity_string[info->severity], + aer_error_layer[layer], aer_agent_string[agent]); - pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", - dev->vendor, dev->device, - info->status, info->mask); + pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", + dev->vendor, dev->device, info->status, info->mask); __aer_print_error(dev, info); -- cgit v1.2.3 From 600a5b4fc8e8f6dad098cd50e0e727cb2a16be46 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 16 Jul 2020 17:34:30 -0500 Subject: PCI/ERR: Rename pci_aer_clear_device_status() to pcie_clear_device_status() pci_aer_clear_device_status() clears the error bits in the PCIe Device Status Register (PCI_EXP_DEVSTA). Every PCIe device has this register, regardless of whether it supports AER. Rename pci_aer_clear_device_status() to pcie_clear_device_status() to make clear that it is PCIe-specific but not AER-specific. Move it to drivers/pci/pci.c, again since it's not AER-specific. No functional change intended. Link: https://lore.kernel.org/r/20200717195619.766662-1-helgaas@kernel.org Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 8 ++++++++ drivers/pci/pci.h | 3 +-- drivers/pci/pcie/aer.c | 10 +--------- drivers/pci/pcie/err.c | 2 +- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ce096272f52b..bc37013c7185 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2046,6 +2046,14 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) } EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state); +void pcie_clear_device_status(struct pci_dev *dev) +{ + u16 sta; + + pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); + pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); +} + /** * pcie_clear_root_pme_status - Clear root port PME interrupt status. * @dev: PCIe root port or event collector. diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index c6c0c455f59f..b1d1ad6fca74 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -92,6 +92,7 @@ void pci_refresh_power_state(struct pci_dev *dev); int pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); int pci_finish_runtime_suspend(struct pci_dev *dev); +void pcie_clear_device_status(struct pci_dev *dev); void pcie_clear_root_pme_status(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); @@ -658,7 +659,6 @@ void pci_aer_init(struct pci_dev *dev); void pci_aer_exit(struct pci_dev *dev); extern const struct attribute_group aer_stats_attr_group; void pci_aer_clear_fatal_status(struct pci_dev *dev); -void pci_aer_clear_device_status(struct pci_dev *dev); int pci_aer_clear_status(struct pci_dev *dev); int pci_aer_raw_clear_status(struct pci_dev *dev); #else @@ -666,7 +666,6 @@ static inline void pci_no_aer(void) { } static inline void pci_aer_init(struct pci_dev *d) { } static inline void pci_aer_exit(struct pci_dev *d) { } static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } -static inline void pci_aer_clear_device_status(struct pci_dev *dev) { } static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; } #endif diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index ca886bf91fd9..f6d778308558 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -241,14 +241,6 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); -void pci_aer_clear_device_status(struct pci_dev *dev) -{ - u16 sta; - - pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); - pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); -} - int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { int aer = dev->aer_cap; @@ -947,7 +939,7 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) if (aer) pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, info->status); - pci_aer_clear_device_status(dev); + pcie_clear_device_status(dev); } else if (info->severity == AER_NONFATAL) pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); else if (info->severity == AER_FATAL) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 467686ee2d8b..55755bc493f1 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -197,7 +197,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_dbg(dev, "broadcast resume message\n"); pci_walk_bus(bus, report_resume, &status); - pci_aer_clear_device_status(dev); + pcie_clear_device_status(dev); pci_aer_clear_nonfatal_status(dev); pci_info(dev, "device recovery successful\n"); return status; -- cgit v1.2.3 From 068c29a248b6ddbfdf7bb150b547569759620d36 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 22 Jun 2020 19:35:23 +0800 Subject: PCI/ERR: Clear PCIe Device Status errors only if OS owns AER pcie_clear_device_status() resets the error bits in the PCIe Device Status Register (PCI_EXP_DEVSTA). Previously we did this unconditionally, but on ACPI systems, the _OSC AER bit negotiates control of the AER capability. Per sec 4.5.1 of the System Firmware Intermediary _OSC and DPC Updates ECN [1], this bit also covers other error enable/status bits including the following: Correctable Error Reporting Enable Non-Fatal Error Reporting Enable Fatal Error Reporting Enable Unsupported Request Reporting Enable These bits are all in the PCIe Device Control register (the ECN omitted "Reporting", but I think that's a typo), so by implication the _OSC AER bit also applies to the error status bits in the PCIe Device Status register: Correctable Error Detected Non-Fatal Error Detected Fatal Error Detected Unsupported Request Detected Clear the PCIe Device Status error bits only when the OS controls the AER capability and related error enable/status bits. If platform firmware controls the AER capability, firmware is responsible for clearing these bits. One call path leading here is: ghes_do_proc ghes_handle_aer aer_recover_queue schedule_work(&aer_recover_work) ... aer_recover_work_func pcie_do_recovery pcie_clear_device_status [1] System Firmware Intermediary (SFI) _OSC and DPC Updates ECN, Feb 24, 2020, affecting PCI Firmware Specification, Rev. 3.2 https://members.pcisig.com/wg/PCI-SIG/document/14076 [bhelgaas: commit log, move test from pcie_clear_device_status() to callers] Link: https://lore.kernel.org/r/20200622113523.891666-1-Jonathan.Cameron@huawei.com Signed-off-by: Jonathan Cameron Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer.c | 3 ++- drivers/pci/pcie/err.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index f6d778308558..cccd6747e5f6 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -939,7 +939,8 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) if (aer) pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, info->status); - pcie_clear_device_status(dev); + if (pcie_aer_is_native(dev)) + pcie_clear_device_status(dev); } else if (info->severity == AER_NONFATAL) pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); else if (info->severity == AER_FATAL) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 55755bc493f1..c543f419d8f9 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -197,7 +197,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_dbg(dev, "broadcast resume message\n"); pci_walk_bus(bus, report_resume, &status); - pcie_clear_device_status(dev); + if (pcie_aer_is_native(dev)) + pcie_clear_device_status(dev); pci_aer_clear_nonfatal_status(dev); pci_info(dev, "device recovery successful\n"); return status; -- cgit v1.2.3