summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/PCI/index.rst1
-rw-r--r--Documentation/PCI/tph.rst132
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--Documentation/driver-api/pci/pci.rst3
-rw-r--r--drivers/pci/Kconfig9
-rw-r--r--drivers/pci/Makefile1
-rw-r--r--drivers/pci/pci.c4
-rw-r--r--drivers/pci/pci.h12
-rw-r--r--drivers/pci/probe.c1
-rw-r--r--drivers/pci/tph.c547
-rw-r--r--include/linux/pci-tph.h44
-rw-r--r--include/linux/pci.h7
-rw-r--r--include/uapi/linux/pci_regs.h37
13 files changed, 794 insertions, 8 deletions
diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index e73f84aebde3..5e7c4e6e726b 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -18,3 +18,4 @@ PCI Bus Subsystem
pcieaer-howto
endpoint/index
boot-interrupts
+ tph
diff --git a/Documentation/PCI/tph.rst b/Documentation/PCI/tph.rst
new file mode 100644
index 000000000000..e8993be64fd6
--- /dev/null
+++ b/Documentation/PCI/tph.rst
@@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+
+===========
+TPH Support
+===========
+
+:Copyright: 2024 Advanced Micro Devices, Inc.
+:Authors: - Eric van Tassell <eric.vantassell@amd.com>
+ - Wei Huang <wei.huang2@amd.com>
+
+
+Overview
+========
+
+TPH (TLP Processing Hints) is a PCIe feature that allows endpoint devices
+to provide optimization hints for requests that target memory space.
+These hints, in a format called Steering Tags (STs), are embedded in the
+requester's TLP headers, enabling the system hardware, such as the Root
+Complex, to better manage platform resources for these requests.
+
+For example, on platforms with TPH-based direct data cache injection
+support, an endpoint device can include appropriate STs in its DMA
+traffic to specify which cache the data should be written to. This allows
+the CPU core to have a higher probability of getting data from cache,
+potentially improving performance and reducing latency in data
+processing.
+
+
+How to Use TPH
+==============
+
+TPH is presented as an optional extended capability in PCIe. The Linux
+kernel handles TPH discovery during boot, but it is up to the device
+driver to request TPH enablement if it is to be utilized. Once enabled,
+the driver uses the provided API to obtain the Steering Tag for the
+target memory and to program the ST into the device's ST table.
+
+Enable TPH support in Linux
+---------------------------
+
+To support TPH, the kernel must be built with the CONFIG_PCIE_TPH option
+enabled.
+
+Manage TPH
+----------
+
+To enable TPH for a device, use the following function::
+
+ int pcie_enable_tph(struct pci_dev *pdev, int mode);
+
+This function enables TPH support for device with a specific ST mode.
+Current supported modes include:
+
+ * PCI_TPH_ST_NS_MODE - NO ST Mode
+ * PCI_TPH_ST_IV_MODE - Interrupt Vector Mode
+ * PCI_TPH_ST_DS_MODE - Device Specific Mode
+
+`pcie_enable_tph()` checks whether the requested mode is actually
+supported by the device before enabling. The device driver can figure out
+which TPH mode is supported and can be properly enabled based on the
+return value of `pcie_enable_tph()`.
+
+To disable TPH, use the following function::
+
+ void pcie_disable_tph(struct pci_dev *pdev);
+
+Manage ST
+---------
+
+Steering Tags are platform specific. PCIe spec does not specify where STs
+are from. Instead PCI Firmware Specification defines an ACPI _DSM method
+(see the `Revised _DSM for Cache Locality TPH Features ECN
+<https://members.pcisig.com/wg/PCI-SIG/document/15470>`_) for retrieving
+STs for a target memory of various properties. This method is what is
+supported in this implementation.
+
+To retrieve a Steering Tag for a target memory associated with a specific
+CPU, use the following function::
+
+ int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type type,
+ unsigned int cpu_uid, u16 *tag);
+
+The `type` argument is used to specify the memory type, either volatile
+or persistent, of the target memory. The `cpu_uid` argument specifies the
+CPU where the memory is associated to.
+
+After the ST value is retrieved, the device driver can use the following
+function to write the ST into the device::
+
+ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index,
+ u16 tag);
+
+The `index` argument is the ST table entry index the ST tag will be
+written into. `pcie_tph_set_st_entry()` will figure out the proper
+location of ST table, either in the MSI-X table or in the TPH Extended
+Capability space, and write the Steering Tag into the ST entry pointed by
+the `index` argument.
+
+It is completely up to the driver to decide how to use these TPH
+functions. For example a network device driver can use the TPH APIs above
+to update the Steering Tag when interrupt affinity of a RX/TX queue has
+been changed. Here is a sample code for IRQ affinity notifier:
+
+.. code-block:: c
+
+ static void irq_affinity_notified(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+ {
+ struct drv_irq *irq;
+ unsigned int cpu_id;
+ u16 tag;
+
+ irq = container_of(notify, struct drv_irq, affinity_notify);
+ cpumask_copy(irq->cpu_mask, mask);
+
+ /* Pick a right CPU as the target - here is just an example */
+ cpu_id = cpumask_first(irq->cpu_mask);
+
+ if (pcie_tph_get_cpu_st(irq->pdev, TPH_MEM_TYPE_VM, cpu_id,
+ &tag))
+ return;
+
+ if (pcie_tph_set_st_entry(irq->pdev, irq->msix_nr, tag))
+ return;
+ }
+
+Disable TPH system-wide
+-----------------------
+
+There is a kernel command line option available to control TPH feature:
+ * "notph": TPH will be disabled for all endpoint devices.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 1518343bbe22..178995b07451 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4678,6 +4678,10 @@
nomio [S390] Do not use MIO instructions.
norid [S390] ignore the RID field and force use of
one PCI domain per PCI function
+ notph [PCIE] If the PCIE_TPH kernel config parameter
+ is enabled, this kernel boot option can be used
+ to disable PCIe TLP Processing Hints support
+ system-wide.
pcie_aspm= [PCIE] Forcibly enable or ignore PCIe Active State Power
Management.
diff --git a/Documentation/driver-api/pci/pci.rst b/Documentation/driver-api/pci/pci.rst
index aa40b1cc243b..59d86e827198 100644
--- a/Documentation/driver-api/pci/pci.rst
+++ b/Documentation/driver-api/pci/pci.rst
@@ -46,6 +46,9 @@ PCI Support Library
.. kernel-doc:: drivers/pci/pci-sysfs.c
:internal:
+.. kernel-doc:: drivers/pci/tph.c
+ :export:
+
PCI Hotplug Support Library
---------------------------
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index e1c025698a28..2fbd379923fd 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -173,6 +173,15 @@ config PCI_PASID
If unsure, say N.
+config PCIE_TPH
+ bool "TLP Processing Hints"
+ help
+ This option adds support for PCIe TLP Processing Hints (TPH).
+ TPH allows endpoint devices to provide optimization hints, such as
+ desired caching behavior, for requests that target memory space.
+ These hints, called Steering Tags, can empower the system hardware
+ to optimize the utilization of platform resources.
+
config PCI_P2PDMA
bool "PCI peer-to-peer transfer support"
depends on ZONE_DEVICE
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 39a07890abd1..67647f1880fb 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_VGA_ARB) += vgaarb.o
obj-$(CONFIG_PCI_DOE) += doe.o
obj-$(CONFIG_PCI_DYNAMIC_OF_NODES) += of_property.o
obj-$(CONFIG_PCI_NPEM) += npem.o
+obj-$(CONFIG_PCIE_TPH) += tph.o
# Endpoint library must be initialized before its users
obj-$(CONFIG_PCI_ENDPOINT) += endpoint/
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 880f7565acf3..43c9a0e029c9 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1828,6 +1828,7 @@ int pci_save_state(struct pci_dev *dev)
pci_save_dpc_state(dev);
pci_save_aer_state(dev);
pci_save_ptm_state(dev);
+ pci_save_tph_state(dev);
return pci_save_vc_state(dev);
}
EXPORT_SYMBOL(pci_save_state);
@@ -1933,6 +1934,7 @@ void pci_restore_state(struct pci_dev *dev)
pci_restore_rebar_state(dev);
pci_restore_dpc_state(dev);
pci_restore_ptm_state(dev);
+ pci_restore_tph_state(dev);
pci_aer_clear_status(dev);
pci_restore_aer_state(dev);
@@ -6925,6 +6927,8 @@ static int __init pci_setup(char *str)
pci_no_domains();
} else if (!strncmp(str, "noari", 5)) {
pcie_ari_disabled = true;
+ } else if (!strncmp(str, "notph", 5)) {
+ pci_no_tph();
} else if (!strncmp(str, "cbiosize=", 9)) {
pci_cardbus_io_size = memparse(str + 9, &str);
} else if (!strncmp(str, "cbmemsize=", 10)) {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 7531e4b005c6..2e40fc63ba31 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -638,6 +638,18 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
#endif /* CONFIG_PCI_IOV */
+#ifdef CONFIG_PCIE_TPH
+void pci_restore_tph_state(struct pci_dev *dev);
+void pci_save_tph_state(struct pci_dev *dev);
+void pci_no_tph(void);
+void pci_tph_init(struct pci_dev *dev);
+#else
+static inline void pci_restore_tph_state(struct pci_dev *dev) { }
+static inline void pci_save_tph_state(struct pci_dev *dev) { }
+static inline void pci_no_tph(void) { }
+static inline void pci_tph_init(struct pci_dev *dev) { }
+#endif
+
#ifdef CONFIG_PCIE_PTM
void pci_ptm_init(struct pci_dev *dev);
void pci_save_ptm_state(struct pci_dev *dev);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8a7e58e20839..bf4c76ec8cd4 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2516,6 +2516,7 @@ static void pci_init_capabilities(struct pci_dev *dev)
pci_dpc_init(dev); /* Downstream Port Containment */
pci_rcec_init(dev); /* Root Complex Event Collector */
pci_doe_init(dev); /* Data Object Exchange */
+ pci_tph_init(dev); /* TLP Processing Hints */
pcie_report_downtraining(dev);
pci_init_reset_methods(dev);
diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c
new file mode 100644
index 000000000000..1e604fbbda65
--- /dev/null
+++ b/drivers/pci/tph.c
@@ -0,0 +1,547 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * TPH (TLP Processing Hints) support
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ * Eric Van Tassell <Eric.VanTassell@amd.com>
+ * Wei Huang <wei.huang2@amd.com>
+ */
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/msi.h>
+#include <linux/bitfield.h>
+#include <linux/pci-tph.h>
+
+#include "pci.h"
+
+/* System-wide TPH disabled */
+static bool pci_tph_disabled;
+
+#ifdef CONFIG_ACPI
+/*
+ * The st_info struct defines the Steering Tag (ST) info returned by the
+ * firmware PCI ACPI _DSM method (rev=0x7, func=0xF, "_DSM to Query Cache
+ * Locality TPH Features"), as specified in the approved ECN for PCI Firmware
+ * Spec and available at https://members.pcisig.com/wg/PCI-SIG/document/15470.
+ *
+ * @vm_st_valid: 8-bit ST for volatile memory is valid
+ * @vm_xst_valid: 16-bit extended ST for volatile memory is valid
+ * @vm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied
+ * @vm_st: 8-bit ST for volatile mem
+ * @vm_xst: 16-bit extended ST for volatile mem
+ * @pm_st_valid: 8-bit ST for persistent memory is valid
+ * @pm_xst_valid: 16-bit extended ST for persistent memory is valid
+ * @pm_ph_ignore: 1 => PH was and will be ignored, 0 => PH should be supplied
+ * @pm_st: 8-bit ST for persistent mem
+ * @pm_xst: 16-bit extended ST for persistent mem
+ */
+union st_info {
+ struct {
+ u64 vm_st_valid : 1;
+ u64 vm_xst_valid : 1;
+ u64 vm_ph_ignore : 1;
+ u64 rsvd1 : 5;
+ u64 vm_st : 8;
+ u64 vm_xst : 16;
+ u64 pm_st_valid : 1;
+ u64 pm_xst_valid : 1;
+ u64 pm_ph_ignore : 1;
+ u64 rsvd2 : 5;
+ u64 pm_st : 8;
+ u64 pm_xst : 16;
+ };
+ u64 value;
+};
+
+static u16 tph_extract_tag(enum tph_mem_type mem_type, u8 req_type,
+ union st_info *info)
+{
+ switch (req_type) {
+ case PCI_TPH_REQ_TPH_ONLY: /* 8-bit tag */
+ switch (mem_type) {
+ case TPH_MEM_TYPE_VM:
+ if (info->vm_st_valid)
+ return info->vm_st;
+ break;
+ case TPH_MEM_TYPE_PM:
+ if (info->pm_st_valid)
+ return info->pm_st;
+ break;
+ }
+ break;
+ case PCI_TPH_REQ_EXT_TPH: /* 16-bit tag */
+ switch (mem_type) {
+ case TPH_MEM_TYPE_VM:
+ if (info->vm_xst_valid)
+ return info->vm_xst;
+ break;
+ case TPH_MEM_TYPE_PM:
+ if (info->pm_xst_valid)
+ return info->pm_xst;
+ break;
+ }
+ break;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+#define TPH_ST_DSM_FUNC_INDEX 0xF
+static acpi_status tph_invoke_dsm(acpi_handle handle, u32 cpu_uid,
+ union st_info *st_out)
+{
+ union acpi_object arg3[3], in_obj, *out_obj;
+
+ if (!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 7,
+ BIT(TPH_ST_DSM_FUNC_INDEX)))
+ return AE_ERROR;
+
+ /* DWORD: feature ID (0 for processor cache ST query) */
+ arg3[0].integer.type = ACPI_TYPE_INTEGER;
+ arg3[0].integer.value = 0;
+
+ /* DWORD: target UID */
+ arg3[1].integer.type = ACPI_TYPE_INTEGER;
+ arg3[1].integer.value = cpu_uid;
+
+ /* QWORD: properties, all 0's */
+ arg3[2].integer.type = ACPI_TYPE_INTEGER;
+ arg3[2].integer.value = 0;
+
+ in_obj.type = ACPI_TYPE_PACKAGE;
+ in_obj.package.count = ARRAY_SIZE(arg3);
+ in_obj.package.elements = arg3;
+
+ out_obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 7,
+ TPH_ST_DSM_FUNC_INDEX, &in_obj);
+ if (!out_obj)
+ return AE_ERROR;
+
+ if (out_obj->type != ACPI_TYPE_BUFFER) {
+ ACPI_FREE(out_obj);
+ return AE_ERROR;
+ }
+
+ st_out->value = *((u64 *)(out_obj->buffer.pointer));
+
+ ACPI_FREE(out_obj);
+
+ return AE_OK;
+}
+#endif
+
+/* Update the TPH Requester Enable field of TPH Control Register */
+static void set_ctrl_reg_req_en(struct pci_dev *pdev, u8 req_type)
+{
+ u32 reg;
+
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, &reg);
+
+ reg &= ~PCI_TPH_CTRL_REQ_EN_MASK;
+ reg |= FIELD_PREP(PCI_TPH_CTRL_REQ_EN_MASK, req_type);
+
+ pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg);
+}
+
+static u8 get_st_modes(struct pci_dev *pdev)
+{
+ u32 reg;
+
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, &reg);
+ reg &= PCI_TPH_CAP_ST_NS | PCI_TPH_CAP_ST_IV | PCI_TPH_CAP_ST_DS;
+
+ return reg;
+}
+
+static u32 get_st_table_loc(struct pci_dev *pdev)
+{
+ u32 reg;
+
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, &reg);
+
+ return FIELD_GET(PCI_TPH_CAP_LOC_MASK, reg);
+}
+
+/*
+ * Return the size of ST table. If ST table is not in TPH Requester Extended
+ * Capability space, return 0. Otherwise return the ST Table Size + 1.
+ */
+static u16 get_st_table_size(struct pci_dev *pdev)
+{
+ u32 reg;
+ u32 loc;
+
+ /* Check ST table location first */
+ loc = get_st_table_loc(pdev);
+
+ /* Convert loc to match with PCI_TPH_LOC_* defined in pci_regs.h */
+ loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc);
+ if (loc != PCI_TPH_LOC_CAP)
+ return 0;
+
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, &reg);
+
+ return FIELD_GET(PCI_TPH_CAP_ST_MASK, reg) + 1;
+}
+
+/* Return device's Root Port completer capability */
+static u8 get_rp_completer_type(struct pci_dev *pdev)
+{
+ struct pci_dev *rp;
+ u32 reg;
+ int ret;
+
+ rp = pcie_find_root_port(pdev);
+ if (!rp)
+ return 0;
+
+ ret = pcie_capability_read_dword(rp, PCI_EXP_DEVCAP2, &reg);
+ if (ret)
+ return 0;
+
+ return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg);
+}
+
+/* Write ST to MSI-X vector control reg - Return 0 if OK, otherwise -errno */
+static int write_tag_to_msix(struct pci_dev *pdev, int msix_idx, u16 tag)
+{
+#ifdef CONFIG_PCI_MSI
+ struct msi_desc *msi_desc = NULL;
+ void __iomem *vec_ctrl;
+ u32 val;
+ int err = 0;
+
+ msi_lock_descs(&pdev->dev);
+
+ /* Find the msi_desc entry with matching msix_idx */
+ msi_for_each_desc(msi_desc, &pdev->dev, MSI_DESC_ASSOCIATED) {
+ if (msi_desc->msi_index == msix_idx)
+ break;
+ }
+
+ if (!msi_desc) {
+ err = -ENXIO;
+ goto err_out;
+ }
+
+ /* Get the vector control register (offset 0xc) pointed by msix_idx */
+ vec_ctrl = pdev->msix_base + msix_idx * PCI_MSIX_ENTRY_SIZE;
+ vec_ctrl += PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+ val = readl(vec_ctrl);
+ val &= ~PCI_MSIX_ENTRY_CTRL_ST;
+ val |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag);
+ writel(val, vec_ctrl);
+
+ /* Read back to flush the update */
+ val = readl(vec_ctrl);
+
+err_out:
+ msi_unlock_descs(&pdev->dev);
+ return err;
+#else
+ return -ENODEV;
+#endif
+}
+
+/* Write tag to ST table - Return 0 if OK, otherwise -errno */
+static int write_tag_to_st_table(struct pci_dev *pdev, int index, u16 tag)
+{
+ int st_table_size;
+ int offset;
+
+ /* Check if index is out of bound */
+ st_table_size = get_st_table_size(pdev);
+ if (index >= st_table_size)
+ return -ENXIO;
+
+ offset = pdev->tph_cap + PCI_TPH_BASE_SIZEOF + index * sizeof(u16);
+
+ return pci_write_config_word(pdev, offset, tag);
+}
+
+/**
+ * pcie_tph_get_cpu_st() - Retrieve Steering Tag for a target memory associated
+ * with a specific CPU
+ * @pdev: PCI device
+ * @mem_type: target memory type (volatile or persistent RAM)
+ * @cpu_uid: associated CPU id
+ * @tag: Steering Tag to be returned
+ *
+ * Return the Steering Tag for a target memory that is associated with a
+ * specific CPU as indicated by cpu_uid.
+ *
+ * Return: 0 if success, otherwise negative value (-errno)
+ */
+int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type mem_type,
+ unsigned int cpu_uid, u16 *tag)
+{
+#ifdef CONFIG_ACPI
+ struct pci_dev *rp;
+ acpi_handle rp_acpi_handle;
+ union st_info info;
+
+ rp = pcie_find_root_port(pdev);
+ if (!rp || !rp->bus || !rp->bus->bridge)
+ return -ENODEV;
+
+ rp_acpi_handle = ACPI_HANDLE(rp->bus->bridge);
+
+ if (tph_invoke_dsm(rp_acpi_handle, cpu_uid, &info) != AE_OK) {
+ *tag = 0;
+ return -EINVAL;
+ }
+
+ *tag = tph_extract_tag(mem_type, pdev->tph_req_type, &info);
+
+ pci_dbg(pdev, "get steering tag: mem_type=%s, cpu_uid=%d, tag=%#04x\n",
+ (mem_type == TPH_MEM_TYPE_VM) ? "volatile" : "persistent",
+ cpu_uid, *tag);
+
+ return 0;
+#else
+ return -ENODEV;
+#endif
+}
+EXPORT_SYMBOL(pcie_tph_get_cpu_st);
+
+/**
+ * pcie_tph_set_st_entry() - Set Steering Tag in the ST table entry
+ * @pdev: PCI device
+ * @index: ST table entry index
+ * @tag: Steering Tag to be written
+ *
+ * Figure out the proper location of ST table, either in the MSI-X table or
+ * in the TPH Extended Capability space, and write the Steering Tag into
+ * the ST entry pointed by index.
+ *
+ * Return: 0 if success, otherwise negative value (-errno)
+ */
+int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag)
+{
+ u32 loc;
+ int err = 0;
+
+ if (!pdev->tph_cap)
+ return -EINVAL;
+
+ if (!pdev->tph_enabled)
+ return -EINVAL;
+
+ /* No need to write tag if device is in "No ST Mode" */
+ if (pdev->tph_mode == PCI_TPH_ST_NS_MODE)
+ return 0;
+
+ /*
+ * Disable TPH before updating ST to avoid potential instability as
+ * cautioned in PCIe r6.2, sec 6.17.3, "ST Modes of Operation"
+ */
+ set_ctrl_reg_req_en(pdev, PCI_TPH_REQ_DISABLE);
+
+ loc = get_st_table_loc(pdev);
+ /* Convert loc to match with PCI_TPH_LOC_* */
+ loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc);
+
+ switch (loc) {
+ case PCI_TPH_LOC_MSIX:
+ err = write_tag_to_msix(pdev, index, tag);
+ break;
+ case PCI_TPH_LOC_CAP:
+ err = write_tag_to_st_table(pdev, index, tag);
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (err) {
+ pcie_disable_tph(pdev);
+ return err;
+ }
+
+ set_ctrl_reg_req_en(pdev, pdev->tph_mode);
+
+ pci_dbg(pdev, "set steering tag: %s table, index=%d, tag=%#04x\n",
+ (loc == PCI_TPH_LOC_MSIX) ? "MSI-X" : "ST", index, tag);
+
+ return 0;
+}
+EXPORT_SYMBOL(pcie_tph_set_st_entry);
+
+/**
+ * pcie_disable_tph - Turn off TPH support for device
+ * @pdev: PCI device
+ *
+ * Return: none
+ */
+void pcie_disable_tph(struct pci_dev *pdev)
+{
+ if (!pdev->tph_cap)
+ return;
+
+ if (!pdev->tph_enabled)
+ return;
+
+ pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, 0);
+
+ pdev->tph_mode = 0;
+ pdev->tph_req_type = 0;
+ pdev->tph_enabled = 0;
+}
+EXPORT_SYMBOL(pcie_disable_tph);
+
+/**
+ * pcie_enable_tph - Enable TPH support for device using a specific ST mode
+ * @pdev: PCI device
+ * @mode: ST mode to enable. Current supported modes include:
+ *
+ * - PCI_TPH_ST_NS_MODE: NO ST Mode
+ * - PCI_TPH_ST_IV_MODE: Interrupt Vector Mode
+ * - PCI_TPH_ST_DS_MODE: Device Specific Mode
+ *
+ * Check whether the mode is actually supported by the device before enabling
+ * and return an error if not. Additionally determine what types of requests,
+ * TPH or extended TPH, can be issued by the device based on its TPH requester
+ * capability and the Root Port's completer capability.
+ *
+ * Return: 0 on success, otherwise negative value (-errno)
+ */
+int pcie_enable_tph(struct pci_dev *pdev, int mode)
+{
+ u32 reg;
+ u8 dev_modes;
+ u8 rp_req_type;
+
+ /* Honor "notph" kernel parameter */
+ if (pci_tph_disabled)
+ return -EINVAL;
+
+ if (!pdev->tph_cap)
+ return -EINVAL;
+
+ if (pdev->tph_enabled)
+ return -EBUSY;
+
+ /* Sanitize and check ST mode compatibility */
+ mode &= PCI_TPH_CTRL_MODE_SEL_MASK;
+ dev_modes = get_st_modes(pdev);
+ if (!((1 << mode) & dev_modes))
+ return -EINVAL;
+
+ pdev->tph_mode = mode;
+
+ /* Get req_type supported by device and its Root Port */
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, &reg);
+ if (FIELD_GET(PCI_TPH_CAP_EXT_TPH, reg))
+ pdev->tph_req_type = PCI_TPH_REQ_EXT_TPH;
+ else
+ pdev->tph_req_type = PCI_TPH_REQ_TPH_ONLY;
+
+ rp_req_type = get_rp_completer_type(pdev);
+
+ /* Final req_type is the smallest value of two */
+ pdev->tph_req_type = min(pdev->tph_req_type, rp_req_type);
+
+ if (pdev->tph_req_type == PCI_TPH_REQ_DISABLE)
+ return -EINVAL;
+
+ /* Write them into TPH control register */
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, &reg);
+
+ reg &= ~PCI_TPH_CTRL_MODE_SEL_MASK;
+ reg |= FIELD_PREP(PCI_TPH_CTRL_MODE_SEL_MASK, pdev->tph_mode);
+
+ reg &= ~PCI_TPH_CTRL_REQ_EN_MASK;
+ reg |= FIELD_PREP(PCI_TPH_CTRL_REQ_EN_MASK, pdev->tph_req_type);
+
+ pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg);
+
+ pdev->tph_enabled = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(pcie_enable_tph);
+
+void pci_restore_tph_state(struct pci_dev *pdev)
+{
+ struct pci_cap_saved_state *save_state;
+ int num_entries, i, offset;
+ u16 *st_entry;
+ u32 *cap;
+
+ if (!pdev->tph_cap)
+ return;
+
+ if (!pdev->tph_enabled)
+ return;
+
+ save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH);
+ if (!save_state)
+ return;
+
+ /* Restore control register and all ST entries */
+ cap = &save_state->cap.data[0];
+ pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, *cap++);
+ st_entry = (u16 *)cap;
+ offset = PCI_TPH_BASE_SIZEOF;
+ num_entries = get_st_table_size(pdev);
+ for (i = 0; i < num_entries; i++) {
+ pci_write_config_word(pdev, pdev->tph_cap + offset,
+ *st_entry++);
+ offset += sizeof(u16);
+ }
+}
+
+void pci_save_tph_state(struct pci_dev *pdev)
+{
+ struct pci_cap_saved_state *save_state;
+ int num_entries, i, offset;
+ u16 *st_entry;
+ u32 *cap;
+
+ if (!pdev->tph_cap)
+ return;
+
+ if (!pdev->tph_enabled)
+ return;
+
+ save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH);
+ if (!save_state)
+ return;
+
+ /* Save control register */
+ cap = &save_state->cap.data[0];
+ pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, cap++);
+
+ /* Save all ST entries in extended capability structure */
+ st_entry = (u16 *)cap;
+ offset = PCI_TPH_BASE_SIZEOF;
+ num_entries = get_st_table_size(pdev);
+ for (i = 0; i < num_entries; i++) {
+ pci_read_config_word(pdev, pdev->tph_cap + offset,
+ st_entry++);
+ offset += sizeof(u16);
+ }
+}
+
+void pci_no_tph(void)
+{
+ pci_tph_disabled = true;
+
+ pr_info("PCIe TPH is disabled\n");
+}
+
+void pci_tph_init(struct pci_dev *pdev)
+{
+ int num_entries;
+ u32 save_size;
+
+ pdev->tph_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_TPH);
+ if (!pdev->tph_cap)
+ return;
+
+ num_entries = get_st_table_size(pdev);
+ save_size = sizeof(u32) + num_entries * sizeof(u16);
+ pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_TPH, save_size);
+}
diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h
new file mode 100644
index 000000000000..c3e806c13d64
--- /dev/null
+++ b/include/linux/pci-tph.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * TPH (TLP Processing Hints)
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ * Eric Van Tassell <Eric.VanTassell@amd.com>
+ * Wei Huang <wei.huang2@amd.com>
+ */
+#ifndef LINUX_PCI_TPH_H
+#define LINUX_PCI_TPH_H
+
+/*
+ * According to the ECN for PCI Firmware Spec, Steering Tag can be different
+ * depending on the memory type: Volatile Memory or Persistent Memory. When a
+ * caller query about a target's Steering Tag, it must provide the target's
+ * tph_mem_type. ECN link: https://members.pcisig.com/wg/PCI-SIG/document/15470.
+ */
+enum tph_mem_type {
+ TPH_MEM_TYPE_VM, /* volatile memory */
+ TPH_MEM_TYPE_PM /* persistent memory */
+};
+
+#ifdef CONFIG_PCIE_TPH
+int pcie_tph_set_st_entry(struct pci_dev *pdev,
+ unsigned int index, u16 tag);
+int pcie_tph_get_cpu_st(struct pci_dev *dev,
+ enum tph_mem_type mem_type,
+ unsigned int cpu_uid, u16 *tag);
+void pcie_disable_tph(struct pci_dev *pdev);
+int pcie_enable_tph(struct pci_dev *pdev, int mode);
+#else
+static inline int pcie_tph_set_st_entry(struct pci_dev *pdev,
+ unsigned int index, u16 tag)
+{ return -EINVAL; }
+static inline int pcie_tph_get_cpu_st(struct pci_dev *dev,
+ enum tph_mem_type mem_type,
+ unsigned int cpu_uid, u16 *tag)
+{ return -EINVAL; }
+static inline void pcie_disable_tph(struct pci_dev *pdev) { }
+static inline int pcie_enable_tph(struct pci_dev *pdev, int mode)
+{ return -EINVAL; }
+#endif
+
+#endif /* LINUX_PCI_TPH_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5db9b0b2dca1..19147cb5bd9d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -442,6 +442,7 @@ struct pci_dev {
unsigned int ats_enabled:1; /* Address Translation Svc */
unsigned int pasid_enabled:1; /* Process Address Space ID */
unsigned int pri_enabled:1; /* Page Request Interface */
+ unsigned int tph_enabled:1; /* TLP Processing Hints */
unsigned int is_managed:1; /* Managed via devres */
unsigned int is_msi_managed:1; /* MSI release via devres installed */
unsigned int needs_freset:1; /* Requires fundamental reset */
@@ -544,6 +545,12 @@ struct pci_dev {
/* These methods index pci_reset_fn_methods[] */
u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */
+
+#ifdef CONFIG_PCIE_TPH
+ u16 tph_cap; /* TPH capability offset */
+ u8 tph_mode; /* TPH mode */
+ u8 tph_req_type; /* TPH requester type */
+#endif
};
static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index f3c9de0a497c..1601c7ed5fab 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -340,7 +340,8 @@
#define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */
#define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */
#define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */
-#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001
+#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 /* Mask Bit */
+#define PCI_MSIX_ENTRY_CTRL_ST 0xffff0000 /* Steering Tag */
/* CompactPCI Hotswap Register */
@@ -659,6 +660,7 @@
#define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* 64b AtomicOp completion */
#define PCI_EXP_DEVCAP2_ATOMIC_COMP128 0x00000200 /* 128b AtomicOp completion */
#define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */
+#define PCI_EXP_DEVCAP2_TPH_COMP_MASK 0x00003000 /* TPH completer support */
#define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */
#define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */
#define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */
@@ -1024,15 +1026,34 @@
#define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */
#define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */
+/* TPH Completer Support */
+#define PCI_EXP_DEVCAP2_TPH_COMP_NONE 0x0 /* None */
+#define PCI_EXP_DEVCAP2_TPH_COMP_TPH_ONLY 0x1 /* TPH only */
+#define PCI_EXP_DEVCAP2_TPH_COMP_EXT_TPH 0x3 /* TPH and Extended TPH */
+
/* TPH Requester */
#define PCI_TPH_CAP 4 /* capability register */
-#define PCI_TPH_CAP_LOC_MASK 0x600 /* location mask */
-#define PCI_TPH_LOC_NONE 0x000 /* no location */
-#define PCI_TPH_LOC_CAP 0x200 /* in capability */
-#define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */
-#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */
-#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */
-#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */
+#define PCI_TPH_CAP_ST_NS 0x00000001 /* No ST Mode Supported */
+#define PCI_TPH_CAP_ST_IV 0x00000002 /* Interrupt Vector Mode Supported */
+#define PCI_TPH_CAP_ST_DS 0x00000004 /* Device Specific Mode Supported */
+#define PCI_TPH_CAP_EXT_TPH 0x00000100 /* Ext TPH Requester Supported */
+#define PCI_TPH_CAP_LOC_MASK 0x00000600 /* ST Table Location */
+#define PCI_TPH_LOC_NONE 0x00000000 /* Not present */
+#define PCI_TPH_LOC_CAP 0x00000200 /* In capability */
+#define PCI_TPH_LOC_MSIX 0x00000400 /* In MSI-X */
+#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST Table Size */
+#define PCI_TPH_CAP_ST_SHIFT 16 /* ST Table Size shift */
+#define PCI_TPH_BASE_SIZEOF 0xc /* Size with no ST table */
+
+#define PCI_TPH_CTRL 8 /* control register */
+#define PCI_TPH_CTRL_MODE_SEL_MASK 0x00000007 /* ST Mode Select */
+#define PCI_TPH_ST_NS_MODE 0x0 /* No ST Mode */
+#define PCI_TPH_ST_IV_MODE 0x1 /* Interrupt Vector Mode */
+#define PCI_TPH_ST_DS_MODE 0x2 /* Device Specific Mode */
+#define PCI_TPH_CTRL_REQ_EN_MASK 0x00000300 /* TPH Requester Enable */
+#define PCI_TPH_REQ_DISABLE 0x0 /* No TPH requests allowed */
+#define PCI_TPH_REQ_TPH_ONLY 0x1 /* TPH only requests allowed */
+#define PCI_TPH_REQ_EXT_TPH 0x3 /* Extended TPH requests allowed */
/* Downstream Port Containment */
#define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */