diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-30 11:51:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-30 11:51:51 -0700 |
commit | 7acc1372113083fa281ba426021801e2402caca1 (patch) | |
tree | f910476851f61f87ac5f740aa9ad17673aa8d56a | |
parent | 10de638d8ea57ebab4231ea077bed01d9bade775 (diff) | |
parent | fd35fdcbf75b5f31dba6c284886b676bb2145fe6 (diff) |
Merge tag 'cxl-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull compute express link updates from Dan Williams:
"DOE support is promoted from drivers/cxl/ to drivers/pci/ with Bjorn's
blessing, and the CXL core continues to mature its media management
capabilities with support for listing and injecting media errors. Some
late fixes that missed v6.3-final are also included:
- Refactor the DOE infrastructure (Data Object Exchange
PCI-config-cycle mailbox) to be a facility of the PCI core rather
than the CXL core.
This is foundational for upcoming support for PCI
device-attestation and PCIe / CXL link encryption.
- Add support for retrieving and injecting poison for CXL memory
expanders.
This enabling uses trace-events to convey CXL media error records
to user tooling. It includes translation of device-local addresses
(DPA) to system physical addresses (SPA) and their corresponding
CXL region.
- Fixes for decoder enumeration that missed v6.3-final
- Miscellaneous fixups"
* tag 'cxl-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (38 commits)
cxl/test: Add mock test for set_timestamp
cxl/mbox: Update CMD_RC_TABLE
tools/testing/cxl: Require CONFIG_DEBUG_FS
tools/testing/cxl: Add a sysfs attr to test poison inject limits
tools/testing/cxl: Use injected poison for get poison list
tools/testing/cxl: Mock the Clear Poison mailbox command
tools/testing/cxl: Mock the Inject Poison mailbox command
cxl/mem: Add debugfs attributes for poison inject and clear
cxl/memdev: Trace inject and clear poison as cxl_poison events
cxl/memdev: Warn of poison inject or clear to a mapped region
cxl/memdev: Add support for the Clear Poison mailbox command
cxl/memdev: Add support for the Inject Poison mailbox command
tools/testing/cxl: Mock support for Get Poison List
cxl/trace: Add an HPA to cxl_poison trace events
cxl/region: Provide region info to the cxl_poison trace event
cxl/memdev: Add trigger_poison_list sysfs attribute
cxl/trace: Add TRACE support for CXL media-error records
cxl/mbox: Add GET_POISON_LIST mailbox command
cxl/mbox: Initialize the poison state
cxl/mbox: Restrict poison cmds to debugfs cxl_raw_allow_all
...
-rw-r--r-- | .clang-format | 1 | ||||
-rw-r--r-- | Documentation/ABI/testing/debugfs-cxl | 35 | ||||
-rw-r--r-- | Documentation/ABI/testing/sysfs-bus-cxl | 14 | ||||
-rw-r--r-- | drivers/cxl/core/core.h | 11 | ||||
-rw-r--r-- | drivers/cxl/core/hdm.c | 52 | ||||
-rw-r--r-- | drivers/cxl/core/mbox.c | 151 | ||||
-rw-r--r-- | drivers/cxl/core/memdev.c | 227 | ||||
-rw-r--r-- | drivers/cxl/core/pci.c | 137 | ||||
-rw-r--r-- | drivers/cxl/core/port.c | 1 | ||||
-rw-r--r-- | drivers/cxl/core/region.c | 124 | ||||
-rw-r--r-- | drivers/cxl/core/trace.c | 94 | ||||
-rw-r--r-- | drivers/cxl/core/trace.h | 103 | ||||
-rw-r--r-- | drivers/cxl/cxlmem.h | 111 | ||||
-rw-r--r-- | drivers/cxl/mem.c | 71 | ||||
-rw-r--r-- | drivers/cxl/pci.c | 53 | ||||
-rw-r--r-- | drivers/cxl/port.c | 18 | ||||
-rw-r--r-- | drivers/pci/doe.c | 328 | ||||
-rw-r--r-- | drivers/pci/pci.h | 11 | ||||
-rw-r--r-- | drivers/pci/probe.c | 1 | ||||
-rw-r--r-- | drivers/pci/remove.c | 1 | ||||
-rw-r--r-- | include/linux/pci-doe.h | 66 | ||||
-rw-r--r-- | include/linux/pci.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/cxl_mem.h | 35 | ||||
-rw-r--r-- | tools/testing/cxl/config_check.c | 1 | ||||
-rw-r--r-- | tools/testing/cxl/test/mem.c | 247 |
25 files changed, 1583 insertions, 313 deletions
diff --git a/.clang-format b/.clang-format index 2048b0296d76..0d1ed8776733 100644 --- a/.clang-format +++ b/.clang-format @@ -521,7 +521,6 @@ ForEachMacros: - 'of_property_for_each_u32' - 'pci_bus_for_each_resource' - 'pci_dev_for_each_resource' - - 'pci_doe_for_each_off' - 'pcl_for_each_chunk' - 'pcl_for_each_segment' - 'pcm_for_each_format' diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl new file mode 100644 index 000000000000..fe61d372e3fa --- /dev/null +++ b/Documentation/ABI/testing/debugfs-cxl @@ -0,0 +1,35 @@ +What: /sys/kernel/debug/cxl/memX/inject_poison +Date: April, 2023 +KernelVersion: v6.4 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) When a Device Physical Address (DPA) is written to this + attribute, the memdev driver sends an inject poison command to + the device for the specified address. The DPA must be 64-byte + aligned and the length of the injected poison is 64-bytes. If + successful, the device returns poison when the address is + accessed through the CXL.mem bus. Injecting poison adds the + address to the device's Poison List and the error source is set + to Injected. In addition, the device adds a poison creation + event to its internal Informational Event log, updates the + Event Status register, and if configured, interrupts the host. + It is not an error to inject poison into an address that + already has poison present and no error is returned. The + inject_poison attribute is only visible for devices supporting + the capability. + + +What: /sys/kernel/debug/memX/clear_poison +Date: April, 2023 +KernelVersion: v6.4 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) When a Device Physical Address (DPA) is written to this + attribute, the memdev driver sends a clear poison command to + the device for the specified address. Clearing poison removes + the address from the device's Poison List and writes 0 (zero) + for 64 bytes starting at address. It is not an error to clear + poison from an address that does not have poison set. If the + device cannot clear poison from the address, -ENXIO is returned. + The clear_poison attribute is only visible for devices + supporting the capability. diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 3acf2f17a73f..48ac0d911801 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -415,3 +415,17 @@ Description: 1), and checks that the hardware accepts the commit request. Reading this value indicates whether the region is committed or not. + + +What: /sys/bus/cxl/devices/memX/trigger_poison_list +Date: April, 2023 +KernelVersion: v6.4 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) When a boolean 'true' is written to this attribute the + memdev driver retrieves the poison list from the device. The + list consists of addresses that are poisoned, or would result + in poison if accessed, and the source of the poison. This + attribute is only visible for devices supporting the + capability. The retrieved errors are logged as kernel + events when cxl_poison event tracing is enabled. diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index cde475e13216..27f0968449de 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -25,7 +25,12 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); #define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type) int cxl_region_init(void); void cxl_region_exit(void); +int cxl_get_poison_by_endpoint(struct cxl_port *port); #else +static inline int cxl_get_poison_by_endpoint(struct cxl_port *port) +{ + return 0; +} static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) { } @@ -64,4 +69,10 @@ int cxl_memdev_init(void); void cxl_memdev_exit(void); void cxl_mbox_init(void); +enum cxl_poison_trace_type { + CXL_POISON_TRACE_LIST, + CXL_POISON_TRACE_INJECT, + CXL_POISON_TRACE_CLEAR, +}; + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 02cc2c38b44b..7889ff203a34 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ -#include <linux/io-64-nonatomic-hi-lo.h> #include <linux/seq_file.h> #include <linux/device.h> #include <linux/delay.h> @@ -93,8 +92,9 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, cxl_probe_component_regs(&port->dev, crb, &map.component_map); if (!map.component_map.hdm_decoder.valid) { - dev_err(&port->dev, "HDM decoder registers invalid\n"); - return -ENXIO; + dev_dbg(&port->dev, "HDM decoder registers not implemented\n"); + /* unique error code to indicate no HDM decoder capability */ + return -ENODEV; } return cxl_map_component_regs(&port->dev, regs, &map, @@ -130,6 +130,14 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) */ for (i = 0; i < cxlhdm->decoder_count; i++) { ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + dev_dbg(&info->port->dev, + "decoder%d.%d: committed: %ld base: %#x_%.8x size: %#x_%.8x\n", + info->port->id, i, + FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl), + readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)), + readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)), + readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)), + readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i))); if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl)) return false; } @@ -269,8 +277,11 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, lockdep_assert_held_write(&cxl_dpa_rwsem); - if (!len) - goto success; + if (!len) { + dev_warn(dev, "decoder%d.%d: empty reservation attempted\n", + port->id, cxled->cxld.id); + return -EINVAL; + } if (cxled->dpa_res) { dev_dbg(dev, "decoder%d.%d: existing allocation %pr assigned\n", @@ -323,7 +334,6 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, cxled->mode = CXL_DECODER_MIXED; } -success: port->hdm_end++; get_device(&cxled->cxld.dev); return 0; @@ -783,8 +793,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, int *target_map, void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) { + u64 size, base, skip, dpa_size, lo, hi; struct cxl_endpoint_decoder *cxled; - u64 size, base, skip, dpa_size; bool committed; u32 remainder; int i, rc; @@ -799,8 +809,12 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, which, info); ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); - base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); - size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which)); + lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); + hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(which)); + base = (hi << 32) + lo; + lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which)); + hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which)); + size = (hi << 32) + lo; committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED); cxld->commit = cxl_decoder_commit; cxld->reset = cxl_decoder_reset; @@ -833,6 +847,13 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, port->id, cxld->id); return -ENXIO; } + + if (size == 0) { + dev_warn(&port->dev, + "decoder%d.%d: Committed with zero size\n", + port->id, cxld->id); + return -ENXIO; + } port->commit_end = cxld->id; } else { /* unless / until type-2 drivers arrive, assume type-3 */ @@ -855,9 +876,14 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, if (rc) return rc; + dev_dbg(&port->dev, "decoder%d.%d: range: %#llx-%#llx iw: %d ig: %d\n", + port->id, cxld->id, cxld->hpa_range.start, cxld->hpa_range.end, + cxld->interleave_ways, cxld->interleave_granularity); + if (!info) { - target_list.value = - ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which)); + lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which)); + hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which)); + target_list.value = (hi << 32) + lo; for (i = 0; i < cxld->interleave_ways; i++) target_map[i] = target_list.target_id[i]; @@ -874,7 +900,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, port->id, cxld->id, size, cxld->interleave_ways); return -ENXIO; } - skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which)); + lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which)); + hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which)); + skip = (hi << 32) + lo; cxled = to_cxl_endpoint_decoder(&cxld->dev); rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip); if (rc) { diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index f2addb457172..23b9ff920d7e 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1,10 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/security.h> #include <linux/debugfs.h> #include <linux/ktime.h> #include <linux/mutex.h> +#include <asm/unaligned.h> +#include <cxlpci.h> #include <cxlmem.h> #include <cxl.h> @@ -61,12 +62,7 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0), CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0), CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0), - CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0), - CXL_CMD(INJECT_POISON, 0x8, 0, 0), - CXL_CMD(CLEAR_POISON, 0x48, 0, 0), CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0), - CXL_CMD(SCAN_MEDIA, 0x11, 0, 0), - CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0), }; /* @@ -87,6 +83,9 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = { * * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that * is kept up to date with patrol notifications and error management. + * + * CXL_MBOX_OP_[GET_,INJECT_,CLEAR_]POISON: These commands require kernel + * driver orchestration for safety. */ static u16 cxl_disabled_raw_commands[] = { CXL_MBOX_OP_ACTIVATE_FW, @@ -95,6 +94,9 @@ static u16 cxl_disabled_raw_commands[] = { CXL_MBOX_OP_SET_SHUTDOWN_STATE, CXL_MBOX_OP_SCAN_MEDIA, CXL_MBOX_OP_GET_SCAN_MEDIA, + CXL_MBOX_OP_GET_POISON, + CXL_MBOX_OP_INJECT_POISON, + CXL_MBOX_OP_CLEAR_POISON, }; /* @@ -119,6 +121,43 @@ static bool cxl_is_security_command(u16 opcode) return false; } +static bool cxl_is_poison_command(u16 opcode) +{ +#define CXL_MBOX_OP_POISON_CMDS 0x43 + + if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS) + return true; + + return false; +} + +static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison, + u16 opcode) +{ + switch (opcode) { + case CXL_MBOX_OP_GET_POISON: + set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds); + break; + case CXL_MBOX_OP_INJECT_POISON: + set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds); + break; + case CXL_MBOX_OP_CLEAR_POISON: + set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds); + break; + case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS: + set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds); + break; + case CXL_MBOX_OP_SCAN_MEDIA: + set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds); + break; + case CXL_MBOX_OP_GET_SCAN_MEDIA: + set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds); + break; + default: + break; + } +} + static struct cxl_mem_command *cxl_mem_find_command(u16 opcode) { struct cxl_mem_command *c; @@ -634,13 +673,18 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel) u16 opcode = le16_to_cpu(cel_entry[i].opcode); struct cxl_mem_command *cmd = cxl_mem_find_command(opcode); - if (!cmd) { + if (!cmd && !cxl_is_poison_command(opcode)) { dev_dbg(cxlds->dev, "Opcode 0x%04x unsupported by driver\n", opcode); continue; } - set_bit(cmd->info.id, cxlds->enabled_cmds); + if (cmd) + set_bit(cmd->info.id, cxlds->enabled_cmds); + + if (cxl_is_poison_command(opcode)) + cxl_set_poison_cmd_enabled(&cxlds->poison, opcode); + dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode); } } @@ -994,6 +1038,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ struct cxl_mbox_identify id; struct cxl_mbox_cmd mbox_cmd; + u32 val; int rc; mbox_cmd = (struct cxl_mbox_cmd) { @@ -1017,6 +1062,11 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) cxlds->lsa_size = le32_to_cpu(id.lsa_size); memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision)); + if (test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) { + val = get_unaligned_le24(id.poison_list_max_mer); + cxlds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX); + } + return 0; } EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); @@ -1107,6 +1157,91 @@ int cxl_set_timestamp(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL); +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, + struct cxl_region *cxlr) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_mbox_poison_out *po; + struct cxl_mbox_poison_in pi; + struct cxl_mbox_cmd mbox_cmd; + int nr_records = 0; + int rc; + + rc = mutex_lock_interruptible(&cxlds->poison.lock); + if (rc) + return rc; + + po = cxlds->poison.list_out; + pi.offset = cpu_to_le64(offset); + pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT); + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_POISON, + .size_in = sizeof(pi), + .payload_in = &pi, + .size_out = cxlds->payload_size, + .payload_out = po, + .min_out = struct_size(po, record, 0), + }; + + do { + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc) + break; + + for (int i = 0; i < le16_to_cpu(po->count); i++) + trace_cxl_poison(cxlmd, cxlr, &po->record[i], + po->flags, po->overflow_ts, + CXL_POISON_TRACE_LIST); + + /* Protect against an uncleared _FLAG_MORE */ + nr_records = nr_records + le16_to_cpu(po->count); + if (nr_records >= cxlds->poison.max_errors) { + dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n", + nr_records); + break; + } + } while (po->flags & CXL_POISON_FLAG_MORE); + + mutex_unlock(&cxlds->poison.lock); + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, CXL); + +static void free_poison_buf(void *buf) +{ + kvfree(buf); +} + +/* Get Poison List output buffer is protected by cxlds->poison.lock */ +static int cxl_poison_alloc_buf(struct cxl_dev_state *cxlds) +{ + cxlds->poison.list_out = kvmalloc(cxlds->payload_size, GFP_KERNEL); + if (!cxlds->poison.list_out) + return -ENOMEM; + + return devm_add_action_or_reset(cxlds->dev, free_poison_buf, + cxlds->poison.list_out); +} + +int cxl_poison_state_init(struct cxl_dev_state *cxlds) +{ + int rc; + + if (!test_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds)) + return 0; + + rc = cxl_poison_alloc_buf(cxlds); + if (rc) { + clear_bit(CXL_POISON_ENABLED_LIST, cxlds->poison.enabled_cmds); + return rc; + } + + mutex_init(&cxlds->poison.lock); + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL); + struct cxl_dev_state *cxl_dev_state_create(struct device *dev) { struct cxl_dev_state *cxlds; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 28a05f2fe32d..057a43267290 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -6,6 +6,7 @@ #include <linux/idr.h> #include <linux/pci.h> #include <cxlmem.h> +#include "trace.h" #include "core.h" static DECLARE_RWSEM(cxl_memdev_rwsem); @@ -106,6 +107,232 @@ static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(numa_node); +static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + u64 offset, length; + int rc = 0; + + /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ + if (resource_size(&cxlds->pmem_res)) { + offset = cxlds->pmem_res.start; + length = resource_size(&cxlds->pmem_res); + rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); + if (rc) + return rc; + } + if (resource_size(&cxlds->ram_res)) { + offset = cxlds->ram_res.start; + length = resource_size(&cxlds->ram_res); + rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); + /* + * Invalid Physical Address is not an error for + * volatile addresses. Device support is optional. + */ + if (rc == -EFAULT) + rc = 0; + } + return rc; +} + +int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) +{ + struct cxl_port *port; + int rc; + + port = dev_get_drvdata(&cxlmd->dev); + if (!port || !is_cxl_endpoint(port)) + return -EINVAL; + + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) + return rc; + + if (port->commit_end == -1) { + /* No regions mapped to this memdev */ + rc = cxl_get_poison_by_memdev(cxlmd); + } else { + /* Regions mapped, collect poison by endpoint */ + rc = cxl_get_poison_by_endpoint(port); + } + up_read(&cxl_dpa_rwsem); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL); + +struct cxl_dpa_to_region_context { + struct cxl_region *cxlr; + u64 dpa; +}; + +static int __cxl_dpa_to_region(struct device *dev, void *arg) +{ + struct cxl_dpa_to_region_context *ctx = arg; + struct cxl_endpoint_decoder *cxled; + u64 dpa = ctx->dpa; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) + return 0; + + if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) + return 0; + + dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, + dev_name(&cxled->cxld.region->dev)); + + ctx->cxlr = cxled->cxld.region; + + return 1; +} + +static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) +{ + struct cxl_dpa_to_region_context ctx; + struct cxl_port *port; + + ctx = (struct cxl_dpa_to_region_context) { + .dpa = dpa, + }; + port = dev_get_drvdata(&cxlmd->dev); + if (port && is_cxl_endpoint(port) && port->commit_end != -1) + device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); + + return ctx.cxlr; +} + +static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + if (!resource_size(&cxlds->dpa_res)) { + dev_dbg(cxlds->dev, "device has no dpa resource\n"); + return -EINVAL; + } + if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) { + dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n", + dpa, &cxlds->dpa_res); + return -EINVAL; + } + if (!IS_ALIGNED(dpa, 64)) { + dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa); + return -EINVAL; + } + + return 0; +} + +int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_mbox_inject_poison inject; + struct cxl_poison_record record; + struct cxl_mbox_cmd mbox_cmd; + struct cxl_region *cxlr; + int rc; + + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) + return rc; + + rc = cxl_validate_poison_dpa(cxlmd, dpa); + if (rc) + goto out; + + inject.address = cpu_to_le64(dpa); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_INJECT_POISON, + .size_in = sizeof(inject), + .payload_in = &inject, + }; + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc) + goto out; + + cxlr = cxl_dpa_to_region(cxlmd, dpa); + if (cxlr) + dev_warn_once(cxlds->dev, + "poison inject dpa:%#llx region: %s\n", dpa, + dev_name(&cxlr->dev)); + + record = (struct cxl_poison_record) { + .address = cpu_to_le64(dpa), + .length = cpu_to_le32(1), + }; + trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); +out: + up_read(&cxl_dpa_rwsem); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL); + +int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_mbox_clear_poison clear; + struct cxl_poison_record record; + struct cxl_mbox_cmd mbox_cmd; + struct cxl_region *cxlr; + int rc; + + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) + return rc; + + rc = cxl_validate_poison_dpa(cxlmd, dpa); + if (rc) + goto out; + + /* + * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command + * is defined to accept 64 bytes of write-data, along with the + * address to clear. This driver uses zeroes as write-data. + */ + clear = (struct cxl_mbox_clear_poison) { + .address = cpu_to_le64(dpa) + }; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_CLEAR_POISON, + .size_in = sizeof(clear), + .payload_in = &clear, + }; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc) + goto out; + + cxlr = cxl_dpa_to_region(cxlmd, dpa); + if (cxlr) + dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n", + dpa, dev_name(&cxlr->dev)); + + record = (struct cxl_poison_record) { + .address = cpu_to_le64(dpa), + .length = cpu_to_le32(1), + }; + trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); +out: + up_read(&cxl_dpa_rwsem); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL); + static struct attribute *cxl_memdev_attributes[] = { &dev_attr_serial.attr, &dev_attr_firmware_version.attr, diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 523d5b9fd7fc..bdbd907884ce 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -441,27 +441,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL); #define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff #define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 -static struct pci_doe_mb *find_cdat_doe(struct device *uport) -{ - struct cxl_memdev *cxlmd; - struct cxl_dev_state *cxlds; - unsigned long index; - void *entry; - - cxlmd = to_cxl_memdev(uport); - cxlds = cxlmd->cxlds; - - xa_for_each(&cxlds->doe_mbs, index, entry) { - struct pci_doe_mb *cur = entry; - - if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL, - CXL_DOE_PROTOCOL_TABLE_ACCESS)) - return cur; - } - - return NULL; -} - #define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \ (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \ CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \ @@ -469,51 +448,26 @@ static struct pci_doe_mb *find_cdat_doe(struct device *uport) CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \ FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle))) -static void cxl_doe_task_complete(struct pci_doe_task *task) -{ - complete(task->private); -} - -struct cdat_doe_task { - __le32 request_pl; - __le32 response_pl[32]; - struct completion c; - struct pci_doe_task task; -}; - -#define DECLARE_CDAT_DOE_TASK(req, cdt) \ -struct cdat_doe_task cdt = { \ - .c = COMPLETION_INITIALIZER_ONSTACK(cdt.c), \ - .request_pl = req, \ - .task = { \ - .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, \ - .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \ - .request_pl = &cdt.request_pl, \ - .request_pl_sz = sizeof(cdt.request_pl), \ - .response_pl = cdt.response_pl, \ - .response_pl_sz = sizeof(cdt.response_pl), \ - .complete = cxl_doe_task_complete, \ - .private = &cdt.c, \ - } \ -} - static int cxl_cdat_get_length(struct device *dev, struct pci_doe_mb *cdat_doe, size_t *length) { - DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t); + __le32 request = CDAT_DOE_REQ(0); + __le32 response[2]; int rc; - rc = pci_doe_submit_task(cdat_doe, &t.task); + rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL, + CXL_DOE_PROTOCOL_TABLE_ACCESS, + &request, sizeof(request), + &response, sizeof(response)); if (rc < 0) { - dev_err(dev, "DOE submit failed: %d", rc); + dev_err(dev, "DOE failed: %d", rc); return rc; } - wait_for_completion(&t.c); - if (t.task.rv < 2 * sizeof(__le32)) + if (rc < sizeof(response)) return -EIO; - *length = le32_to_cpu(t.response_pl[1]); + *length = le32_to_cpu(response[1]); dev_dbg(dev, "CDAT length %zu\n", *length); return 0; @@ -521,51 +475,55 @@ static int cxl_cdat_get_length(struct device *dev, static int cxl_cdat_read_table(struct device *dev, struct pci_doe_mb *cdat_doe, - struct cxl_cdat *cdat) + void *cdat_table, size_t *cdat_length) { - size_t length = cdat->length; - __le32 *data = cdat->table; + size_t length = *cdat_length + sizeof(__le32); + __le32 *data = cdat_table; int entry_handle = 0; + __le32 saved_dw = 0; do { - DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t); + __le32 request = CDAT_DOE_REQ(entry_handle); struct cdat_entry_header *entry; size_t entry_dw; int rc; - rc = pci_doe_submit_task(cdat_doe, &t.task); + rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL, + CXL_DOE_PROTOCOL_TABLE_ACCESS, + &request, sizeof(request), + data, length); if (rc < 0) { - dev_err(dev, "DOE submit failed: %d", rc); + dev_err(dev, "DOE failed: %d", rc); return rc; } - wait_for_completion(&t.c); /* 1 DW Table Access Response Header + CDAT entry */ - entry = (struct cdat_entry_header *)(t.response_pl + 1); + entry = (struct cdat_entry_header *)(data + 1); if ((entry_handle == 0 && - t.task.rv != sizeof(__le32) + sizeof(struct cdat_header)) || + rc != sizeof(__le32) + sizeof(struct cdat_header)) || (entry_handle > 0 && - (t.task.rv < sizeof(__le32) + sizeof(*entry) || - t.task.rv != sizeof(__le32) + le16_to_cpu(entry->length)))) + (rc < sizeof(__le32) + sizeof(*entry) || + rc != sizeof(__le32) + le16_to_cpu(entry->length)))) return -EIO; /* Get the CXL table access header entry handle */ entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, - le32_to_cpu(t.response_pl[0])); - entry_dw = t.task.rv / sizeof(__le32); + le32_to_cpu(data[0])); + entry_dw = rc / sizeof(__le32); /* Skip Header */ entry_dw -= 1; - entry_dw = min(length / sizeof(__le32), entry_dw); - /* Prevent length < 1 DW from causing a buffer overflow */ - if (entry_dw) { - memcpy(data, entry, entry_dw * sizeof(__le32)); - length -= entry_dw * sizeof(__le32); - data += entry_dw; - } + /* + * Table Access Response Header overwrote the last DW of + * previous entry, so restore that DW + */ + *data = saved_dw; + length -= entry_dw * sizeof(__le32); + data += entry_dw; + saved_dw = *data; } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY); /* Length in CDAT header may exceed concatenation of CDAT entries */ - cdat->length -= length; + *cdat_length -= length - sizeof(__le32); return 0; } @@ -578,13 +536,19 @@ static int cxl_cdat_read_table(struct device *dev, */ void read_cdat_data(struct cxl_port *port) { - struct pci_doe_mb *cdat_doe; + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + struct device *host = cxlmd->dev.parent; struct device *dev = &port->dev; - struct device *uport = port->uport; + struct pci_doe_mb *cdat_doe; size_t cdat_length; + void *cdat_table; int rc; - cdat_doe = find_cdat_doe(uport); + if (!dev_is_pci(host)) + return; + cdat_doe = pci_find_doe_mailbox(to_pci_dev(host), + PCI_DVSEC_VENDOR_ID_CXL, + CXL_DOE_PROTOCOL_TABLE_ACCESS); if (!cdat_doe) { dev_dbg(dev, "No CDAT mailbox\n"); return; @@ -597,19 +561,20 @@ void read_cdat_data(struct cxl_port *port) return; } - port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL); - if (!port->cdat.table) + cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32), + GFP_KERNEL); + if (!cdat_table) return; - port->cdat.length = cdat_length; - rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat); + rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length); if (rc) { /* Don't leave table data allocated on error */ - devm_kfree(dev, port->cdat.table); - port->cdat.table = NULL; - port->cdat.length = 0; + devm_kfree(dev, cdat_table); dev_err(dev, "CDAT data read error\n"); } + + port->cdat.table = cdat_table + sizeof(__le32); + port->cdat.length = cdat_length; } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 72b889af9917..da2068475fa2 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/memregion.h> #include <linux/workqueue.h> #include <linux/debugfs.h> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index b2fd67fcebfb..f822de44bee0 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2238,6 +2238,130 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev) } EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL); +struct cxl_poison_context { + struct cxl_port *port; + enum cxl_decoder_mode mode; + u64 offset; +}; + +static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd, + struct cxl_poison_context *ctx) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + u64 offset, length; + int rc = 0; + + /* + * Collect poison for the remaining unmapped resources + * after poison is collected by committed endpoints. + * + * Knowing that PMEM must always follow RAM, get poison + * for unmapped resources based on the last decoder's mode: + * ram: scan remains of ram range, then any pmem range + * pmem: scan remains of pmem range + */ + + if (ctx->mode == CXL_DECODER_RAM) { + offset = ctx->offset; + length = resource_size(&cxlds->ram_res) - offset; + rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); + if (rc == -EFAULT) + rc = 0; + if (rc) + return rc; + } + if (ctx->mode == CXL_DECODER_PMEM) { + offset = ctx->offset; + length = resource_size(&cxlds->dpa_res) - offset; + if (!length) + return 0; + } else if (resource_size(&cxlds->pmem_res)) { + offset = cxlds->pmem_res.start; + length = resource_size(&cxlds->pmem_res); + } else { + return 0; + } + + return cxl_mem_get_poison(cxlmd, offset, length, NULL); +} + +static int poison_by_decoder(struct device *dev, void *arg) +{ + struct cxl_poison_context *ctx = arg; + struct cxl_endpoint_decoder *cxled; + struct cxl_memdev *cxlmd; + u64 offset, length; + int rc = 0; + + if (!is_endpoint_decoder(dev)) + return rc; + + cxled = to_cxl_endpoint_decoder(dev); + if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) + return rc; + + /* + * Regions are only created with single mode decoders: pmem or ram. + * Linux does not support mixed mode decoders. This means that + * reading poison per endpoint decoder adheres to the requirement + * that poison reads of pmem and ram must be separated. + * CXL 3.0 Spec 8.2.9.8.4.1 + */ + if (cxled->mode == CXL_DECODER_MIXED) { + dev_dbg(dev, "poison list read unsupported in mixed mode\n"); + return rc; + } + + cxlmd = cxled_to_memdev(cxled); + if (cxled->skip) { + offset = cxled->dpa_res->start - cxled->skip; + length = cxled->skip; + rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); + if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) + rc = 0; + if (rc) + return rc; + } + + offset = cxled->dpa_res->start; + length = cxled->dpa_res->end - offset + 1; + rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region); + if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) + rc = 0; + if (rc) + return rc; + + /* Iterate until commit_end is reached */ + if (cxled->cxld.id == ctx->port->commit_end) { + ctx->offset = cxled->dpa_res->end + 1; + ctx->mode = cxled->mode; + return 1; + } + + return 0; +} + +int cxl_get_poison_by_endpoint(struct cxl_port *port) +{ + struct cxl_poison_context ctx; + int rc = 0; + + rc = down_read_interruptible(&cxl_region_rwsem); + if (rc) + return rc; + + ctx = (struct cxl_poison_context) { + .port = port + }; + + rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder); + if (rc == 1) + rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport), &ctx); + + up_read(&cxl_region_rwsem); + return rc; +} + static struct lock_class_key cxl_pmem_region_key; static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c index 29ae7ce81dc5..d0403dc3c8ab 100644 --- a/drivers/cxl/core/trace.c +++ b/drivers/cxl/core/trace.c @@ -1,5 +1,99 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include <cxl.h> +#include "core.h" + #define CREATE_TRACE_POINTS #include "trace.h" + +static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos) +{ + struct cxl_region_params *p = &cxlr->params; + int gran = p->interleave_granularity; + int ways = p->interleave_ways; + u64 offset; + + /* Is the hpa within this region at all */ + if (hpa < p->res->start || hpa > p->res->end) { + dev_dbg(&cxlr->dev, + "Addr trans fail: hpa 0x%llx not in region\n", hpa); + return false; + } + + /* Is the hpa in an expected chunk for its pos(-ition) */ + offset = hpa - p->res->start; + offset = do_div(offset, gran * ways); + if ((offset >= pos * gran) && (offset < (pos + 1) * gran)) + return true; + + dev_dbg(&cxlr->dev, + "Addr trans fail: hpa 0x%llx not in expected chunk\n", hpa); + + return false; +} + +static u64 cxl_dpa_to_hpa(u64 dpa, struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled) +{ + u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; + struct cxl_region_params *p = &cxlr->params; + int pos = cxled->pos; + u16 eig = 0; + u8 eiw = 0; + + ways_to_eiw(p->interleave_ways, &eiw); + granularity_to_eig(p->interleave_granularity, &eig); + + /* + * The device position in the region interleave set was removed + * from the offset at HPA->DPA translation. To reconstruct the + * HPA, place the 'pos' in the offset. + * + * The placement of 'pos' in the HPA is determined by interleave + * ways and granularity and is defined in the CXL Spec 3.0 Section + * 8.2.4.19.13 Implementation Note: Device Decode Logic + */ + + /* Remove the dpa base */ + dpa_offset = dpa - cxl_dpa_resource_start(cxled); + + mask_upper = GENMASK_ULL(51, eig + 8); + + if (eiw < 8) { + hpa_offset = (dpa_offset & mask_upper) << eiw; + hpa_offset |= pos << (eig + 8); + } else { + bits_upper = (dpa_offset & mask_upper) >> (eig + 8); + bits_upper = bits_upper * 3; + hpa_offset = ((bits_upper << (eiw - 8)) + pos) << (eig + 8); + } + + /* The lower bits remain unchanged */ + hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0); + + /* Apply the hpa_offset to the region base address */ + hpa = hpa_offset + p->res->start; + + if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos)) + return ULLONG_MAX; + + return hpa; +} + +u64 cxl_trace_hpa(struct cxl_region *cxlr, struct cxl_memdev *cxlmd, + u64 dpa) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled = NULL; + + for (int i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + if (cxlmd == cxled_to_memdev(cxled)) + break; + } + if (!cxled || cxlmd != cxled_to_memdev(cxled)) + return ULLONG_MAX; + + return cxl_dpa_to_hpa(dpa, cxlr, cxled); +} diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 9b8d3d997834..a0b5819bc70b 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -7,10 +7,12 @@ #define _CXL_EVENTS_H #include <linux/tracepoint.h> +#include <linux/pci.h> #include <asm-generic/unaligned.h> #include <cxl.h> #include <cxlmem.h> +#include "core.h" #define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0) #define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1) @@ -600,6 +602,107 @@ TRACE_EVENT(cxl_memory_module, ) ); +#define show_poison_trace_type(type) \ + __print_symbolic(type, \ + { CXL_POISON_TRACE_LIST, "List" }, \ + { CXL_POISON_TRACE_INJECT, "Inject" }, \ + { CXL_POISON_TRACE_CLEAR, "Clear" }) + +#define __show_poison_source(source) \ + __print_symbolic(source, \ + { CXL_POISON_SOURCE_UNKNOWN, "Unknown" }, \ + { CXL_POISON_SOURCE_EXTERNAL, "External" }, \ + { CXL_POISON_SOURCE_INTERNAL, "Internal" }, \ + { CXL_POISON_SOURCE_INJECTED, "Injected" }, \ + { CXL_POISON_SOURCE_VENDOR, "Vendor" }) + +#define show_poison_source(source) \ + (((source > CXL_POISON_SOURCE_INJECTED) && \ + (source != CXL_POISON_SOURCE_VENDOR)) ? "Reserved" \ + : __show_poison_source(source)) + +#define show_poison_flags(flags) \ + __print_flags(flags, "|", \ + { CXL_POISON_FLAG_MORE, "More" }, \ + { CXL_POISON_FLAG_OVERFLOW, "Overflow" }, \ + { CXL_POISON_FLAG_SCANNING, "Scanning" }) + +#define __cxl_poison_addr(record) \ + (le64_to_cpu(record->address)) +#define cxl_poison_record_dpa(record) \ + (__cxl_poison_addr(record) & CXL_POISON_START_MASK) +#define cxl_poison_record_source(record) \ + (__cxl_poison_addr(record) & CXL_POISON_SOURCE_MASK) +#define cxl_poison_record_dpa_length(record) \ + (le32_to_cpu(record->length) * CXL_POISON_LEN_MULT) +#define cxl_poison_overflow(flags, time) \ + (flags & CXL_POISON_FLAG_OVERFLOW ? le64_to_cpu(time) : 0) + +u64 cxl_trace_hpa(struct cxl_region *cxlr, struct cxl_memdev *memdev, u64 dpa); + +TRACE_EVENT(cxl_poison, + + TP_PROTO(struct cxl_memdev *cxlmd, struct cxl_region *region, + const struct cxl_poison_record *record, u8 flags, + __le64 overflow_ts, enum cxl_poison_trace_type trace_type), + + TP_ARGS(cxlmd, region, record, flags, overflow_ts, trace_type), + + TP_STRUCT__entry( + __string(memdev, dev_name(&cxlmd->dev)) + __string(host, dev_name(cxlmd->dev.parent)) + __field(u64, serial) + __field(u8, trace_type) + __string(region, region) + __field(u64, overflow_ts) + __field(u64, hpa) + __field(u64, dpa) + __field(u32, dpa_length) + __array(char, uuid, 16) + __field(u8, source) + __field(u8, flags) + ), + + TP_fast_assign( + __assign_str(memdev, dev_name(&cxlmd->dev)); + __assign_str(host, dev_name(cxlmd->dev.parent)); + __entry->serial = cxlmd->cxlds->serial; + __entry->overflow_ts = cxl_poison_overflow(flags, overflow_ts); + __entry->dpa = cxl_poison_record_dpa(record); + __entry->dpa_length = cxl_poison_record_dpa_length(record); + __entry->source = cxl_poison_record_source(record); + __entry->trace_type = trace_type; + __entry->flags = flags; + if (region) { + __assign_str(region, dev_name(®ion->dev)); + memcpy(__entry->uuid, ®ion->params.uuid, 16); + __entry->hpa = cxl_trace_hpa(region, cxlmd, + __entry->dpa); + } else { + __assign_str(region, ""); + memset(__entry->uuid, 0, 16); + __entry->hpa = ULLONG_MAX; + } + ), + + TP_printk("memdev=%s host=%s serial=%lld trace_type=%s region=%s " \ + "region_uuid=%pU hpa=0x%llx dpa=0x%llx dpa_length=0x%x " \ + "source=%s flags=%s overflow_time=%llu", + __get_str(memdev), + __get_str(host), + __entry->serial, + show_poison_trace_type(__entry->trace_type), + __get_str(region), + __entry->uuid, + __entry->hpa, + __entry->dpa, + __entry->dpa_length, + show_poison_source(__entry->source), + show_poison_flags(__entry->flags), + __entry->overflow_ts + ) +); + #endif /* _CXL_EVENTS_H */ #define TRACE_INCLUDE_FILE trace diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 090acebba4fa..db12b6313afb 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -127,7 +127,7 @@ struct cxl_mbox_cmd { }; /* - * Per CXL 2.0 Section 8.2.8.4.5.1 + * Per CXL 3.0 Section 8.2.8.4.5.1 */ #define CMD_CMD_RC_TABLE \ C(SUCCESS, 0, NULL), \ @@ -145,14 +145,22 @@ struct cxl_mbox_cmd { C(FWROLLBACK, -ENXIO, "rolled back to the previous active FW"), \ C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"), \ C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"), \ - C(PADDR, -ENXIO, "physical address specified is invalid"), \ + C(PADDR, -EFAULT, "physical address specified is invalid"), \ C(POISONLMT, -ENXIO, "poison injection limit has been reached"), \ C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"), \ C(ABORT, -ENXIO, "background cmd was aborted by device"), \ C(SECURITY, -ENXIO, "not valid in the current security state"), \ C(PASSPHRASE, -ENXIO, "phrase doesn't match current set passphrase"), \ C(MBUNSUPPORTED, -ENXIO, "unsupported on the mailbox it was issued on"),\ - C(PAYLOADLEN, -ENXIO, "invalid payload length") + C(PAYLOADLEN, -ENXIO, "invalid payload length"), \ + C(LOG, -ENXIO, "invalid or unsupported log page"), \ + C(INTERRUPTED, -ENXIO, "asynchronous event occured"), \ + C(FEATUREVERSION, -ENXIO, "unsupported feature version"), \ + C(FEATURESELVALUE, -ENXIO, "unsupported feature selection value"), \ + C(FEATURETRANSFERIP, -ENXIO, "feature transfer in progress"), \ + C(FEATURETRANSFEROOO, -ENXIO, "feature transfer out of order"), \ + C(RESOURCEEXHAUSTED, -ENXIO, "resources are exhausted"), \ + C(EXTLIST, -ENXIO, "invalid Extent List"), \ #undef C #define C(a, b, c) CXL_MBOX_CMD_RC_##a @@ -215,6 +223,37 @@ struct cxl_event_state { struct mutex log_lock; }; +/* Device enabled poison commands */ +enum poison_cmd_enabled_bits { + CXL_POISON_ENABLED_LIST, + CXL_POISON_ENABLED_INJECT, + CXL_POISON_ENABLED_CLEAR, + CXL_POISON_ENABLED_SCAN_CAPS, + CXL_POISON_ENABLED_SCAN_MEDIA, + CXL_POISON_ENABLED_SCAN_RESULTS, + CXL_POISON_ENABLED_MAX +}; + +/** + * struct cxl_poison_state - Driver poison state info + * + * @max_errors: Maximum media error records held in device cache + * @enabled_cmds: All poison commands enabled in the CEL + * @list_out: The poison list payload returned by device + * @lock: Protect reads of the poison list + * + * Reads of the poison list are synchronized to ensure that a reader + * does not get an incomplete list because their request overlapped + * (was interrupted or preceded by) another read request of the same + * DPA range. CXL Spec 3.0 Section 8.2.9.8.4.1 + */ +struct cxl_poison_state { + u32 max_errors; + DECLARE_BITMAP(enabled_cmds, CXL_POISON_ENABLED_MAX); + struct cxl_mbox_poison_out *list_out; + struct mutex lock; /* Protect reads of poison list */ +}; + /** * struct cxl_dev_state - The driver device state * @@ -249,8 +288,8 @@ struct cxl_event_state { * @component_reg_phys: register base of component registers * @info: Cached DVSEC information about the device. * @serial: PCIe Device Serial Number - * @doe_mbs: PCI DOE mailbox array * @event: event log driver state + * @poison: poison driver state info * @mbox_send: @dev specific transport for transmitting mailbox commands * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for @@ -287,9 +326,8 @@ struct cxl_dev_state { resource_size_t component_reg_phys; u64 serial; - struct xarray doe_mbs; - struct cxl_event_state event; + struct cxl_poison_state poison; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); }; @@ -538,6 +576,61 @@ struct cxl_mbox_set_timestamp_in { } __packed; +/* Get Poison List CXL 3.0 Spec 8.2.9.8.4.1 */ +struct cxl_mbox_poison_in { + __le64 offset; + __le64 length; +} __packed; + +struct cxl_mbox_poison_out { + u8 flags; + u8 rsvd1; + __le64 overflow_ts; + __le16 count; + u8 rsvd2[20]; + struct cxl_poison_record { + __le64 address; + __le32 length; + __le32 rsvd; + } __packed record[]; +} __packed; + +/* + * Get Poison List address field encodes the starting + * address of poison, and the source of the poison. + */ +#define CXL_POISON_START_MASK GENMASK_ULL(63, 6) +#define CXL_POISON_SOURCE_MASK GENMASK(2, 0) + +/* Get Poison List record length is in units of 64 bytes */ +#define CXL_POISON_LEN_MULT 64 + +/* Kernel defined maximum for a list of poison errors */ +#define CXL_POISON_LIST_MAX 1024 + +/* Get Poison List: Payload out flags */ +#define CXL_POISON_FLAG_MORE BIT(0) +#define CXL_POISON_FLAG_OVERFLOW BIT(1) +#define CXL_POISON_FLAG_SCANNING BIT(2) + +/* Get Poison List: Poison Source */ +#define CXL_POISON_SOURCE_UNKNOWN 0 +#define CXL_POISON_SOURCE_EXTERNAL 1 +#define CXL_POISON_SOURCE_INTERNAL 2 +#define CXL_POISON_SOURCE_INJECTED 3 +#define CXL_POISON_SOURCE_VENDOR 7 + +/* Inject & Clear Poison CXL 3.0 Spec 8.2.9.8.4.2/3 */ +struct cxl_mbox_inject_poison { + __le64 address; +}; + +/* Clear Poison CXL 3.0 Spec 8.2.9.8.4.3 */ +struct cxl_mbox_clear_poison { + __le64 address; + u8 write_data[CXL_POISON_LEN_MULT]; +} __packed; + /** * struct cxl_mem_command - Driver representation of a memory device command * @info: Command information as it exists for the UAPI @@ -608,6 +701,12 @@ void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status); int cxl_set_timestamp(struct cxl_dev_state *cxlds); +int cxl_poison_state_init(struct cxl_dev_state *cxlds); +int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, + struct cxl_region *cxlr); +int cxl_trigger_poison_list(struct cxl_memdev *cxlmd); +int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa); +int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa); #ifdef CONFIG_CXL_SUSPEND void cxl_mem_active_inc(void); diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 39c4b54f0715..10caf180b3fa 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -94,6 +94,26 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, return 0; } +static int cxl_debugfs_poison_inject(void *data, u64 dpa) +{ + struct cxl_memdev *cxlmd = data; + + return cxl_inject_poison(cxlmd, dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL, + cxl_debugfs_poison_inject, "%llx\n"); + +static int cxl_debugfs_poison_clear(void *data, u64 dpa) +{ + struct cxl_memdev *cxlmd = data; + + return cxl_clear_poison(cxlmd, dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, + cxl_debugfs_poison_clear, "%llx\n"); + static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); @@ -117,6 +137,14 @@ static int cxl_mem_probe(struct device *dev) dentry = cxl_debugfs_create_dir(dev_name(dev)); debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show); + + if (test_bit(CXL_POISON_ENABLED_INJECT, cxlds->poison.enabled_cmds)) + debugfs_create_file("inject_poison", 0200, dentry, cxlmd, + &cxl_poison_inject_fops); + if (test_bit(CXL_POISON_ENABLED_CLEAR, cxlds->poison.enabled_cmds)) + debugfs_create_file("clear_poison", 0200, dentry, cxlmd, + &cxl_poison_clear_fops); + rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); if (rc) return rc; @@ -176,10 +204,53 @@ unlock: return devm_add_action_or_reset(dev, enable_suspend, NULL); } +static ssize_t trigger_poison_list_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + bool trigger; + int rc; + + if (kstrtobool(buf, &trigger) || !trigger) + return -EINVAL; + + rc = cxl_trigger_poison_list(to_cxl_memdev(dev)); + + return rc ? rc : len; +} +static DEVICE_ATTR_WO(trigger_poison_list); + +static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +{ + if (a == &dev_attr_trigger_poison_list.attr) { + struct device *dev = kobj_to_dev(kobj); + + if (!test_bit(CXL_POISON_ENABLED_LIST, + to_cxl_memdev(dev)->cxlds->poison.enabled_cmds)) + return 0; + } + return a->mode; +} + +static struct attribute *cxl_mem_attrs[] = { + &dev_attr_trigger_poison_list.attr, + NULL +}; + +static struct attribute_group cxl_mem_group = { + .attrs = cxl_mem_attrs, + .is_visible = cxl_mem_visible, +}; + +__ATTRIBUTE_GROUPS(cxl_mem); + static struct cxl_driver cxl_mem_driver = { .name = "cxl_mem", .probe = cxl_mem_probe, .id = CXL_DEVICE_MEMORY_EXPANDER, + .drv = { + .dev_groups = cxl_mem_groups, + }, }; module_cxl_driver(cxl_mem_driver); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 60b23624d167..f7a5b8e9c102 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -8,7 +8,6 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/pci.h> -#include <linux/pci-doe.h> #include <linux/aer.h> #include <linux/io.h> #include "cxlmem.h" @@ -357,52 +356,6 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return rc; } -static void cxl_pci_destroy_doe(void *mbs) -{ - xa_destroy(mbs); -} - -static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds) -{ - struct device *dev = cxlds->dev; - struct pci_dev *pdev = to_pci_dev(dev); - u16 off = 0; - - xa_init(&cxlds->doe_mbs); - if (devm_add_action(&pdev->dev, cxl_pci_destroy_doe, &cxlds->doe_mbs)) { - dev_err(dev, "Failed to create XArray for DOE's\n"); - return; - } - - /* - * Mailbox creation is best effort. Higher layers must determine if - * the lack of a mailbox for their protocol is a device failure or not. - */ - pci_doe_for_each_off(pdev, off) { - struct pci_doe_mb *doe_mb; - - doe_mb = pcim_doe_create_mb(pdev, off); - if (IS_ERR(doe_mb)) { - dev_err(dev, "Failed to create MB object for MB @ %x\n", - off); - continue; - } - - if (!pci_request_config_region_exclusive(pdev, off, - PCI_DOE_CAP_SIZEOF, - dev_name(dev))) - pci_err(pdev, "Failed to exclude DOE registers\n"); - - if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) { - dev_err(dev, "xa_insert failed to insert MB @ %x\n", - off); - continue; - } - - dev_dbg(dev, "Created DOE mailbox @%x\n", off); - } -} - /* * Assume that any RCIEP that emits the CXL memory expander class code * is an RCD @@ -750,8 +703,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) cxlds->component_reg_phys = map.resource; - devm_cxl_pci_create_doe(cxlds); - rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component, &map, BIT(CXL_CM_CAP_CAP_ID_RAS)); if (rc) @@ -769,6 +720,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = cxl_poison_state_init(cxlds); + if (rc) + return rc; + rc = cxl_dev_state_identify(cxlds); if (rc) return rc; diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 22a7ab2bae7c..eb57324c4ad4 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -66,14 +66,22 @@ static int cxl_switch_port_probe(struct cxl_port *port) if (rc < 0) return rc; - if (rc == 1) - return devm_cxl_add_passthrough_decoder(port); - cxlhdm = devm_cxl_setup_hdm(port, NULL); - if (IS_ERR(cxlhdm)) + if (!IS_ERR(cxlhdm)) + return devm_cxl_enumerate_decoders(cxlhdm, NULL); + + if (PTR_ERR(cxlhdm) != -ENODEV) { + dev_err(&port->dev, "Failed to map HDM decoder capability\n"); return PTR_ERR(cxlhdm); + } + + if (rc == 1) { + dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); + return devm_cxl_add_passthrough_decoder(port); + } - return devm_cxl_enumerate_decoders(cxlhdm, NULL); + dev_err(&port->dev, "HDM decoder capability not found\n"); + return -ENXIO; } static int cxl_endpoint_port_probe(struct cxl_port *port) diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index e5e9b287b976..1b97a5ab71a9 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -20,6 +20,8 @@ #include <linux/pci-doe.h> #include <linux/workqueue.h> +#include "pci.h" + #define PCI_DOE_PROTOCOL_DISCOVERY 0 /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */ @@ -37,7 +39,7 @@ * * This state is used to manage a single DOE mailbox capability. All fields * should be considered opaque to the consumers and the structure passed into - * the helpers below after being created by devm_pci_doe_create() + * the helpers below after being created by pci_doe_create_mb(). * * @pdev: PCI device this mailbox belongs to * @cap_offset: Capability offset @@ -56,6 +58,40 @@ struct pci_doe_mb { unsigned long flags; }; +struct pci_doe_protocol { + u16 vid; + u8 type; +}; + +/** + * struct pci_doe_task - represents a single query/response + * + * @prot: DOE Protocol + * @request_pl: The request payload + * @request_pl_sz: Size of the request payload (bytes) + * @response_pl: The response payload + * @response_pl_sz: Size of the response payload (bytes) + * @rv: Return value. Length of received response or error (bytes) + * @complete: Called when task is complete + * @private: Private data for the consumer + * @work: Used internally by the mailbox + * @doe_mb: Used internally by the mailbox + */ +struct pci_doe_task { + struct pci_doe_protocol prot; + const __le32 *request_pl; + size_t request_pl_sz; + __le32 *response_pl; + size_t response_pl_sz; + int rv; + void (*complete)(struct pci_doe_task *task); + void *private; + + /* initialized by pci_doe_submit_task() */ + struct work_struct work; + struct pci_doe_mb *doe_mb; +}; + static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout) { if (wait_event_timeout(doe_mb->wq, @@ -110,7 +146,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, { struct pci_dev *pdev = doe_mb->pdev; int offset = doe_mb->cap_offset; - size_t length; + size_t length, remainder; u32 val; int i; @@ -128,7 +164,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, return -EIO; /* Length is 2 DW of header + length of payload in DW */ - length = 2 + task->request_pl_sz / sizeof(__le32); + length = 2 + DIV_ROUND_UP(task->request_pl_sz, sizeof(__le32)); if (length > PCI_DOE_MAX_LENGTH) return -EIO; if (length == PCI_DOE_MAX_LENGTH) @@ -141,10 +177,21 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, length)); + + /* Write payload */ for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++) pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, le32_to_cpu(task->request_pl[i])); + /* Write last payload dword */ + remainder = task->request_pl_sz % sizeof(__le32); + if (remainder) { + val = 0; + memcpy(&val, &task->request_pl[i], remainder); + le32_to_cpus(&val); + pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val); + } + pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO); return 0; @@ -164,11 +211,11 @@ static bool pci_doe_data_obj_ready(struct pci_doe_mb *doe_mb) static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *task) { + size_t length, payload_length, remainder, received; struct pci_dev *pdev = doe_mb->pdev; int offset = doe_mb->cap_offset; - size_t length, payload_length; + int i = 0; u32 val; - int i; /* Read the first dword to get the protocol */ pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val); @@ -195,15 +242,38 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas /* First 2 dwords have already been read */ length -= 2; - payload_length = min(length, task->response_pl_sz / sizeof(__le32)); - /* Read the rest of the response payload */ - for (i = 0; i < payload_length; i++) { + received = task->response_pl_sz; + payload_length = DIV_ROUND_UP(task->response_pl_sz, sizeof(__le32)); + remainder = task->response_pl_sz % sizeof(__le32); + + /* remainder signifies number of data bytes in last payload dword */ + if (!remainder) + remainder = sizeof(__le32); + + if (length < payload_length) { + received = length * sizeof(__le32); + payload_length = length; + remainder = sizeof(__le32); + } + + if (payload_length) { + /* Read all payload dwords except the last */ + for (; i < payload_length - 1; i++) { + pci_read_config_dword(pdev, offset + PCI_DOE_READ, + &val); + task->response_pl[i] = cpu_to_le32(val); + pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0); + } + + /* Read last payload dword */ pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val); - task->response_pl[i] = cpu_to_le32(val); + cpu_to_le32s(&val); + memcpy(&task->response_pl[i], &val, remainder); /* Prior to the last ack, ensure Data Object Ready */ - if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb)) + if (!pci_doe_data_obj_ready(doe_mb)) return -EIO; pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0); + i++; } /* Flush excess length */ @@ -217,7 +287,7 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) return -EIO; - return min(length, task->response_pl_sz / sizeof(__le32)) * sizeof(__le32); + return received; } static void signal_task_complete(struct pci_doe_task *task, int rv) @@ -321,26 +391,15 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid, __le32 request_pl_le = cpu_to_le32(request_pl); __le32 response_pl_le; u32 response_pl; - DECLARE_COMPLETION_ONSTACK(c); - struct pci_doe_task task = { - .prot.vid = PCI_VENDOR_ID_PCI_SIG, - .prot.type = PCI_DOE_PROTOCOL_DISCOVERY, - .request_pl = &request_pl_le, - .request_pl_sz = sizeof(request_pl), - .response_pl = &response_pl_le, - .response_pl_sz = sizeof(response_pl), - .complete = pci_doe_task_complete, - .private = &c, - }; int rc; - rc = pci_doe_submit_task(doe_mb, &task); + rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_PROTOCOL_DISCOVERY, + &request_pl_le, sizeof(request_pl_le), + &response_pl_le, sizeof(response_pl_le)); if (rc < 0) return rc; - wait_for_completion(&c); - - if (task.rv != sizeof(response_pl)) + if (rc != sizeof(response_pl_le)) return -EIO; response_pl = le32_to_cpu(response_pl_le); @@ -385,37 +444,18 @@ static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb) return 0; } -static void pci_doe_xa_destroy(void *mb) -{ - struct pci_doe_mb *doe_mb = mb; - - xa_destroy(&doe_mb->prots); -} - -static void pci_doe_destroy_workqueue(void *mb) +static void pci_doe_cancel_tasks(struct pci_doe_mb *doe_mb) { - struct pci_doe_mb *doe_mb = mb; - - destroy_workqueue(doe_mb->work_queue); -} - -static void pci_doe_flush_mb(void *mb) -{ - struct pci_doe_mb *doe_mb = mb; - /* Stop all pending work items from starting */ set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags); /* Cancel an in progress work item, if necessary */ set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags); wake_up(&doe_mb->wq); - - /* Flush all work items */ - flush_workqueue(doe_mb->work_queue); } /** - * pcim_doe_create_mb() - Create a DOE mailbox object + * pci_doe_create_mb() - Create a DOE mailbox object * * @pdev: PCI device to create the DOE mailbox for * @cap_offset: Offset of the DOE mailbox @@ -426,64 +466,77 @@ static void pci_doe_flush_mb(void *mb) * RETURNS: created mailbox object on success * ERR_PTR(-errno) on failure */ -struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset) +static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev, + u16 cap_offset) { struct pci_doe_mb *doe_mb; - struct device *dev = &pdev->dev; int rc; - doe_mb = devm_kzalloc(dev, sizeof(*doe_mb), GFP_KERNEL); + doe_mb = kzalloc(sizeof(*doe_mb), GFP_KERNEL); if (!doe_mb) return ERR_PTR(-ENOMEM); doe_mb->pdev = pdev; doe_mb->cap_offset = cap_offset; init_waitqueue_head(&doe_mb->wq); - xa_init(&doe_mb->prots); - rc = devm_add_action(dev, pci_doe_xa_destroy, doe_mb); - if (rc) - return ERR_PTR(rc); doe_mb->work_queue = alloc_ordered_workqueue("%s %s DOE [%x]", 0, - dev_driver_string(&pdev->dev), + dev_bus_name(&pdev->dev), pci_name(pdev), doe_mb->cap_offset); if (!doe_mb->work_queue) { pci_err(pdev, "[%x] failed to allocate work queue\n", doe_mb->cap_offset); - return ERR_PTR(-ENOMEM); + rc = -ENOMEM; + goto err_free; } - rc = devm_add_action_or_reset(dev, pci_doe_destroy_workqueue, doe_mb); - if (rc) - return ERR_PTR(rc); /* Reset the mailbox by issuing an abort */ rc = pci_doe_abort(doe_mb); if (rc) { pci_err(pdev, "[%x] failed to reset mailbox with abort command : %d\n", doe_mb->cap_offset, rc); - return ERR_PTR(rc); + goto err_destroy_wq; } /* * The state machine and the mailbox should be in sync now; - * Set up mailbox flush prior to using the mailbox to query protocols. + * Use the mailbox to query protocols. */ - rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb); - if (rc) - return ERR_PTR(rc); - rc = pci_doe_cache_protocols(doe_mb); if (rc) { pci_err(pdev, "[%x] failed to cache protocols : %d\n", doe_mb->cap_offset, rc); - return ERR_PTR(rc); + goto err_cancel; } return doe_mb; + +err_cancel: + pci_doe_cancel_tasks(doe_mb); + xa_destroy(&doe_mb->prots); +err_destroy_wq: + destroy_workqueue(doe_mb->work_queue); +err_free: + kfree(doe_mb); + return ERR_PTR(rc); +} + +/** + * pci_doe_destroy_mb() - Destroy a DOE mailbox object + * + * @doe_mb: DOE mailbox + * + * Destroy all internal data structures created for the DOE mailbox. + */ +static void pci_doe_destroy_mb(struct pci_doe_mb *doe_mb) +{ + pci_doe_cancel_tasks(doe_mb); + xa_destroy(&doe_mb->prots); + destroy_workqueue(doe_mb->work_queue); + kfree(doe_mb); } -EXPORT_SYMBOL_GPL(pcim_doe_create_mb); /** * pci_doe_supports_prot() - Return if the DOE instance supports the given @@ -494,7 +547,7 @@ EXPORT_SYMBOL_GPL(pcim_doe_create_mb); * * RETURNS: True if the DOE mailbox supports the protocol specified */ -bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type) +static bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type) { unsigned long index; void *entry; @@ -509,7 +562,6 @@ bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type) return false; } -EXPORT_SYMBOL_GPL(pci_doe_supports_prot); /** * pci_doe_submit_task() - Submit a task to be processed by the state machine @@ -530,19 +582,12 @@ EXPORT_SYMBOL_GPL(pci_doe_supports_prot); * * RETURNS: 0 when task has been successfully queued, -ERRNO on error */ -int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task) +static int pci_doe_submit_task(struct pci_doe_mb *doe_mb, + struct pci_doe_task *task) { if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type)) return -EINVAL; - /* - * DOE requests must be a whole number of DW and the response needs to - * be big enough for at least 1 DW - */ - if (task->request_pl_sz % sizeof(__le32) || - task->response_pl_sz < sizeof(__le32)) - return -EINVAL; - if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags)) return -EIO; @@ -551,4 +596,129 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task) queue_work(doe_mb->work_queue, &task->work); return 0; } -EXPORT_SYMBOL_GPL(pci_doe_submit_task); + +/** + * pci_doe() - Perform Data Object Exchange + * + * @doe_mb: DOE Mailbox + * @vendor: Vendor ID + * @type: Data Object Type + * @request: Request payload + * @request_sz: Size of request payload (bytes) + * @response: Response payload + * @response_sz: Size of response payload (bytes) + * + * Submit @request to @doe_mb and store the @response. + * The DOE exchange is performed synchronously and may therefore sleep. + * + * Payloads are treated as opaque byte streams which are transmitted verbatim, + * without byte-swapping. If payloads contain little-endian register values, + * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu(). + * + * For convenience, arbitrary payload sizes are allowed even though PCIe r6.0 + * sec 6.30.1 specifies the Data Object Header 2 "Length" in dwords. The last + * (partial) dword is copied with byte granularity and padded with zeroes if + * necessary. Callers are thus relieved of using dword-sized bounce buffers. + * + * RETURNS: Length of received response or negative errno. + * Received data in excess of @response_sz is discarded. + * The length may be smaller than @response_sz and the caller + * is responsible for checking that. + */ +int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type, + const void *request, size_t request_sz, + void *response, size_t response_sz) +{ + DECLARE_COMPLETION_ONSTACK(c); + struct pci_doe_task task = { + .prot.vid = vendor, + .prot.type = type, + .request_pl = request, + .request_pl_sz = request_sz, + .response_pl = response, + .response_pl_sz = response_sz, + .complete = pci_doe_task_complete, + .private = &c, + }; + int rc; + + rc = pci_doe_submit_task(doe_mb, &task); + if (rc) + return rc; + + wait_for_completion(&c); + + return task.rv; +} +EXPORT_SYMBOL_GPL(pci_doe); + +/** + * pci_find_doe_mailbox() - Find Data Object Exchange mailbox + * + * @pdev: PCI device + * @vendor: Vendor ID + * @type: Data Object Type + * + * Find first DOE mailbox of a PCI device which supports the given protocol. + * + * RETURNS: Pointer to the DOE mailbox or NULL if none was found. + */ +struct pci_doe_mb *pci_find_doe_mailbox(struct pci_dev *pdev, u16 vendor, + u8 type) +{ + struct pci_doe_mb *doe_mb; + unsigned long index; + + xa_for_each(&pdev->doe_mbs, index, doe_mb) + if (pci_doe_supports_prot(doe_mb, vendor, type)) + return doe_mb; + + return NULL; +} +EXPORT_SYMBOL_GPL(pci_find_doe_mailbox); + +void pci_doe_init(struct pci_dev *pdev) +{ + struct pci_doe_mb *doe_mb; + u16 offset = 0; + int rc; + + xa_init(&pdev->doe_mbs); + + while ((offset = pci_find_next_ext_capability(pdev, offset, + PCI_EXT_CAP_ID_DOE))) { + doe_mb = pci_doe_create_mb(pdev, offset); + if (IS_ERR(doe_mb)) { + pci_err(pdev, "[%x] failed to create mailbox: %ld\n", + offset, PTR_ERR(doe_mb)); + continue; + } + + rc = xa_insert(&pdev->doe_mbs, offset, doe_mb, GFP_KERNEL); + if (rc) { + pci_err(pdev, "[%x] failed to insert mailbox: %d\n", + offset, rc); + pci_doe_destroy_mb(doe_mb); + } + } +} + +void pci_doe_destroy(struct pci_dev *pdev) +{ + struct pci_doe_mb *doe_mb; + unsigned long index; + + xa_for_each(&pdev->doe_mbs, index, doe_mb) + pci_doe_destroy_mb(doe_mb); + + xa_destroy(&pdev->doe_mbs); +} + +void pci_doe_disconnected(struct pci_dev *pdev) +{ + struct pci_doe_mb *doe_mb; + unsigned long index; + + xa_for_each(&pdev->doe_mbs, index, doe_mb) + pci_doe_cancel_tasks(doe_mb); +} diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 67005a0ee381..2475098f6518 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -311,6 +311,16 @@ struct pci_sriov { bool drivers_autoprobe; /* Auto probing of VFs by driver */ }; +#ifdef CONFIG_PCI_DOE +void pci_doe_init(struct pci_dev *pdev); +void pci_doe_destroy(struct pci_dev *pdev); +void pci_doe_disconnected(struct pci_dev *pdev); +#else +static inline void pci_doe_init(struct pci_dev *pdev) { } +static inline void pci_doe_destroy(struct pci_dev *pdev) { } +static inline void pci_doe_disconnected(struct pci_dev *pdev) { } +#endif + /** * pci_dev_set_io_state - Set the new error state if possible. * @@ -347,6 +357,7 @@ static inline bool pci_dev_set_io_state(struct pci_dev *dev, static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) { pci_dev_set_io_state(dev, pci_channel_io_perm_failure); + pci_doe_disconnected(dev); return 0; } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 9123bafab451..0b2826c4a832 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2479,6 +2479,7 @@ static void pci_init_capabilities(struct pci_dev *dev) pci_aer_init(dev); /* Advanced Error Reporting */ pci_dpc_init(dev); /* Downstream Port Containment */ pci_rcec_init(dev); /* Root Complex Event Collector */ + pci_doe_init(dev); /* Data Object Exchange */ pcie_report_downtraining(dev); pci_init_reset_methods(dev); diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 30a787d45d2e..d68aee29386b 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -38,6 +38,7 @@ static void pci_destroy_dev(struct pci_dev *dev) list_del(&dev->bus_list); up_write(&pci_bus_sem); + pci_doe_destroy(dev); pcie_aspm_exit_link_state(dev); pci_bridge_d3_update(dev); pci_free_resources(dev); diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h index 43765eaf2342..1f14aed4354b 100644 --- a/include/linux/pci-doe.h +++ b/include/linux/pci-doe.h @@ -13,69 +13,13 @@ #ifndef LINUX_PCI_DOE_H #define LINUX_PCI_DOE_H -struct pci_doe_protocol { - u16 vid; - u8 type; -}; - struct pci_doe_mb; -/** - * struct pci_doe_task - represents a single query/response - * - * @prot: DOE Protocol - * @request_pl: The request payload - * @request_pl_sz: Size of the request payload (bytes) - * @response_pl: The response payload - * @response_pl_sz: Size of the response payload (bytes) - * @rv: Return value. Length of received response or error (bytes) - * @complete: Called when task is complete - * @private: Private data for the consumer - * @work: Used internally by the mailbox - * @doe_mb: Used internally by the mailbox - * - * Payloads are treated as opaque byte streams which are transmitted verbatim, - * without byte-swapping. If payloads contain little-endian register values, - * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu(). - * - * The payload sizes and rv are specified in bytes with the following - * restrictions concerning the protocol. - * - * 1) The request_pl_sz must be a multiple of double words (4 bytes) - * 2) The response_pl_sz must be >= a single double word (4 bytes) - * 3) rv is returned as bytes but it will be a multiple of double words - * - * NOTE there is no need for the caller to initialize work or doe_mb. - */ -struct pci_doe_task { - struct pci_doe_protocol prot; - __le32 *request_pl; - size_t request_pl_sz; - __le32 *response_pl; - size_t response_pl_sz; - int rv; - void (*complete)(struct pci_doe_task *task); - void *private; - - /* No need for the user to initialize these fields */ - struct work_struct work; - struct pci_doe_mb *doe_mb; -}; - -/** - * pci_doe_for_each_off - Iterate each DOE capability - * @pdev: struct pci_dev to iterate - * @off: u16 of config space offset of each mailbox capability found - */ -#define pci_doe_for_each_off(pdev, off) \ - for (off = pci_find_next_ext_capability(pdev, off, \ - PCI_EXT_CAP_ID_DOE); \ - off > 0; \ - off = pci_find_next_ext_capability(pdev, off, \ - PCI_EXT_CAP_ID_DOE)) +struct pci_doe_mb *pci_find_doe_mailbox(struct pci_dev *pdev, u16 vendor, + u8 type); -struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset); -bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type); -int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task); +int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type, + const void *request, size_t request_sz, + void *response, size_t response_sz); #endif diff --git a/include/linux/pci.h b/include/linux/pci.h index 0b57e37d8e77..60b8772b5bd4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -512,6 +512,9 @@ struct pci_dev { #ifdef CONFIG_PCI_P2PDMA struct pci_p2pdma __rcu *p2pdma; #endif +#ifdef CONFIG_PCI_DOE + struct xarray doe_mbs; /* Data Object Exchange mailboxes */ +#endif u16 acs_cap; /* ACS Capability offset */ phys_addr_t rom; /* Physical address if not from BAR */ size_t romlen; /* Length if not from BAR */ diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 86bbacf2a315..14bc6e742148 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -40,19 +40,22 @@ ___C(SET_ALERT_CONFIG, "Set Alert Configuration"), \ ___C(GET_SHUTDOWN_STATE, "Get Shutdown State"), \ ___C(SET_SHUTDOWN_STATE, "Set Shutdown State"), \ - ___C(GET_POISON, "Get Poison List"), \ - ___C(INJECT_POISON, "Inject Poison"), \ - ___C(CLEAR_POISON, "Clear Poison"), \ + ___DEPRECATED(GET_POISON, "Get Poison List"), \ + ___DEPRECATED(INJECT_POISON, "Inject Poison"), \ + ___DEPRECATED(CLEAR_POISON, "Clear Poison"), \ ___C(GET_SCAN_MEDIA_CAPS, "Get Scan Media Capabilities"), \ - ___C(SCAN_MEDIA, "Scan Media"), \ - ___C(GET_SCAN_MEDIA, "Get Scan Media Results"), \ + ___DEPRECATED(SCAN_MEDIA, "Scan Media"), \ + ___DEPRECATED(GET_SCAN_MEDIA, "Get Scan Media Results"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a +#define ___DEPRECATED(a, b) CXL_MEM_DEPRECATED_ID_##a enum { CXL_CMDS }; #undef ___C +#undef ___DEPRECATED #define ___C(a, b) { b } +#define ___DEPRECATED(a, b) { "Deprecated " b } static const struct { const char *name; } cxl_command_names[] __attribute__((__unused__)) = { CXL_CMDS }; @@ -68,6 +71,28 @@ static const struct { */ #undef ___C +#undef ___DEPRECATED +#define ___C(a, b) (0) +#define ___DEPRECATED(a, b) (1) + +static const __u8 cxl_deprecated_commands[] + __attribute__((__unused__)) = { CXL_CMDS }; + +/* + * Here's how this actually breaks out: + * cxl_deprecated_commands[] = { + * [CXL_MEM_COMMAND_ID_INVALID] = 0, + * [CXL_MEM_COMMAND_ID_IDENTIFY] = 0, + * ... + * [CXL_MEM_DEPRECATED_ID_GET_POISON] = 1, + * [CXL_MEM_DEPRECATED_ID_INJECT_POISON] = 1, + * [CXL_MEM_DEPRECATED_ID_CLEAR_POISON] = 1, + * ... + * }; + */ + +#undef ___C +#undef ___DEPRECATED /** * struct cxl_command_info - Command information returned from a query. diff --git a/tools/testing/cxl/config_check.c b/tools/testing/cxl/config_check.c index 99b56b5f6edf..0902c5d6e410 100644 --- a/tools/testing/cxl/config_check.c +++ b/tools/testing/cxl/config_check.c @@ -13,4 +13,5 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_PMEM)); BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)); BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)); + BUILD_BUG_ON(!IS_ENABLED(CONFIG_DEBUG_FS)); } diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 9263b04d35f7..ba572d03c687 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -7,6 +7,7 @@ #include <linux/delay.h> #include <linux/sizes.h> #include <linux/bits.h> +#include <asm/unaligned.h> #include <cxlmem.h> #include "trace.h" @@ -15,6 +16,11 @@ #define DEV_SIZE SZ_2G #define EFFECT(x) (1U << x) +#define MOCK_INJECT_DEV_MAX 8 +#define MOCK_INJECT_TEST_MAX 128 + +static unsigned int poison_inject_dev_max = MOCK_INJECT_DEV_MAX; + static struct cxl_cel_entry mock_cel[] = { { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_LOGS), @@ -40,6 +46,18 @@ static struct cxl_cel_entry mock_cel[] = { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO), .effect = cpu_to_le16(0), }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON), + .effect = cpu_to_le16(0), + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_INJECT_POISON), + .effect = cpu_to_le16(0), + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_CLEAR_POISON), + .effect = cpu_to_le16(0), + }, }; /* See CXL 2.0 Table 181 Get Health Info Output Payload */ @@ -98,6 +116,7 @@ struct cxl_mockmem_data { int master_limit; struct mock_event_store mes; u8 event_buf[SZ_4K]; + u64 timestamp; }; static struct mock_event_log *event_find_log(struct device *dev, int log_type) @@ -361,6 +380,22 @@ struct cxl_event_mem_module mem_module = { } }; +static int mock_set_timestamp(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_mbox_set_timestamp_in *ts = cmd->payload_in; + + if (cmd->size_in != sizeof(*ts)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + mdata->timestamp = le64_to_cpu(ts->timestamp); + return 0; +} + static void cxl_mock_add_event_logs(struct mock_event_store *mes) { put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK, @@ -469,8 +504,11 @@ static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) cpu_to_le64(SZ_256M / CXL_CAPACITY_MULTIPLIER), .total_capacity = cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER), + .inject_poison_limit = cpu_to_le16(MOCK_INJECT_TEST_MAX), }; + put_unaligned_le24(CXL_POISON_LIST_MAX, id.poison_list_max_mer); + if (cmd->size_out < sizeof(id)) return -EINVAL; @@ -888,12 +926,203 @@ static int mock_health_info(struct cxl_dev_state *cxlds, return 0; } +static struct mock_poison { + struct cxl_dev_state *cxlds; + u64 dpa; +} mock_poison_list[MOCK_INJECT_TEST_MAX]; + +static struct cxl_mbox_poison_out * +cxl_get_injected_po(struct cxl_dev_state *cxlds, u64 offset, u64 length) +{ + struct cxl_mbox_poison_out *po; + int nr_records = 0; + u64 dpa; + + po = kzalloc(struct_size(po, record, poison_inject_dev_max), GFP_KERNEL); + if (!po) + return NULL; + + for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { + if (mock_poison_list[i].cxlds != cxlds) + continue; + if (mock_poison_list[i].dpa < offset || + mock_poison_list[i].dpa > offset + length - 1) + continue; + + dpa = mock_poison_list[i].dpa + CXL_POISON_SOURCE_INJECTED; + po->record[nr_records].address = cpu_to_le64(dpa); + po->record[nr_records].length = cpu_to_le32(1); + nr_records++; + if (nr_records == poison_inject_dev_max) + break; + } + + /* Always return count, even when zero */ + po->count = cpu_to_le16(nr_records); + + return po; +} + +static int mock_get_poison(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_poison_in *pi = cmd->payload_in; + struct cxl_mbox_poison_out *po; + u64 offset = le64_to_cpu(pi->offset); + u64 length = le64_to_cpu(pi->length); + int nr_records; + + po = cxl_get_injected_po(cxlds, offset, length); + if (!po) + return -ENOMEM; + nr_records = le16_to_cpu(po->count); + memcpy(cmd->payload_out, po, struct_size(po, record, nr_records)); + cmd->size_out = struct_size(po, record, nr_records); + kfree(po); + + return 0; +} + +static bool mock_poison_dev_max_injected(struct cxl_dev_state *cxlds) +{ + int count = 0; + + for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { + if (mock_poison_list[i].cxlds == cxlds) + count++; + } + return (count >= poison_inject_dev_max); +} + +static bool mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa) +{ + if (mock_poison_dev_max_injected(cxlds)) { + dev_dbg(cxlds->dev, + "Device poison injection limit has been reached: %d\n", + MOCK_INJECT_DEV_MAX); + return false; + } + + for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { + if (!mock_poison_list[i].cxlds) { + mock_poison_list[i].cxlds = cxlds; + mock_poison_list[i].dpa = dpa; + return true; + } + } + dev_dbg(cxlds->dev, + "Mock test poison injection limit has been reached: %d\n", + MOCK_INJECT_TEST_MAX); + + return false; +} + +static bool mock_poison_found(struct cxl_dev_state *cxlds, u64 dpa) +{ + for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { + if (mock_poison_list[i].cxlds == cxlds && + mock_poison_list[i].dpa == dpa) + return true; + } + return false; +} + +static int mock_inject_poison(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_inject_poison *pi = cmd->payload_in; + u64 dpa = le64_to_cpu(pi->address); + + if (mock_poison_found(cxlds, dpa)) { + /* Not an error to inject poison if already poisoned */ + dev_dbg(cxlds->dev, "DPA: 0x%llx already poisoned\n", dpa); + return 0; + } + if (!mock_poison_add(cxlds, dpa)) + return -ENXIO; + + return 0; +} + +static bool mock_poison_del(struct cxl_dev_state *cxlds, u64 dpa) +{ + for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { + if (mock_poison_list[i].cxlds == cxlds && + mock_poison_list[i].dpa == dpa) { + mock_poison_list[i].cxlds = NULL; + return true; + } + } + return false; +} + +static int mock_clear_poison(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_clear_poison *pi = cmd->payload_in; + u64 dpa = le64_to_cpu(pi->address); + + /* + * A real CXL device will write pi->write_data to the address + * being cleared. In this mock, just delete this address from + * the mock poison list. + */ + if (!mock_poison_del(cxlds, dpa)) + dev_dbg(cxlds->dev, "DPA: 0x%llx not in poison list\n", dpa); + + return 0; +} + +static bool mock_poison_list_empty(void) +{ + for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { + if (mock_poison_list[i].cxlds) + return false; + } + return true; +} + +static ssize_t poison_inject_max_show(struct device_driver *drv, char *buf) +{ + return sysfs_emit(buf, "%u\n", poison_inject_dev_max); +} + +static ssize_t poison_inject_max_store(struct device_driver *drv, + const char *buf, size_t len) +{ + int val; + + if (kstrtoint(buf, 0, &val) < 0) + return -EINVAL; + + if (!mock_poison_list_empty()) + return -EBUSY; + + if (val <= MOCK_INJECT_TEST_MAX) + poison_inject_dev_max = val; + else + return -EINVAL; + + return len; +} + +static DRIVER_ATTR_RW(poison_inject_max); + +static struct attribute *cxl_mock_mem_core_attrs[] = { + &driver_attr_poison_inject_max.attr, + NULL +}; +ATTRIBUTE_GROUPS(cxl_mock_mem_core); + static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct device *dev = cxlds->dev; int rc = -EIO; switch (cmd->opcode) { + case CXL_MBOX_OP_SET_TIMESTAMP: + rc = mock_set_timestamp(cxlds, cmd); + break; case CXL_MBOX_OP_GET_SUPPORTED_LOGS: rc = mock_gsl(cmd); break; @@ -942,6 +1171,15 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE: rc = mock_passphrase_secure_erase(cxlds, cmd); break; + case CXL_MBOX_OP_GET_POISON: + rc = mock_get_poison(cxlds, cmd); + break; + case CXL_MBOX_OP_INJECT_POISON: + rc = mock_inject_poison(cxlds, cmd); + break; + case CXL_MBOX_OP_CLEAR_POISON: + rc = mock_clear_poison(cxlds, cmd); + break; default: break; } @@ -1010,6 +1248,14 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; + rc = cxl_poison_state_init(cxlds); + if (rc) + return rc; + + rc = cxl_set_timestamp(cxlds); + if (rc) + return rc; + rc = cxl_dev_state_identify(cxlds); if (rc) return rc; @@ -1083,6 +1329,7 @@ static struct platform_driver cxl_mock_mem_driver = { .driver = { .name = KBUILD_MODNAME, .dev_groups = cxl_mock_mem_groups, + .groups = cxl_mock_mem_core_groups, }, }; |