diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/nfit.c | 775 | ||||
-rw-r--r-- | drivers/acpi/nfit.h | 30 | ||||
-rw-r--r-- | drivers/nvdimm/bus.c | 131 | ||||
-rw-r--r-- | drivers/nvdimm/core.c | 110 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 6 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 4 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 55 | ||||
-rw-r--r-- | drivers/nvdimm/region.c | 12 |
8 files changed, 835 insertions, 288 deletions
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 269de5f75d98..d0f35e63640b 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -21,6 +21,7 @@ #include <linux/sort.h> #include <linux/pmem.h> #include <linux/io.h> +#include <linux/nd.h> #include <asm/cacheflush.h> #include "nfit.h" @@ -34,6 +35,18 @@ static bool force_enable_dimms; module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); +static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT; +module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds"); + +/* after three payloads of overflow, it's dead jim */ +static unsigned int scrub_overflow_abort = 3; +module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scrub_overflow_abort, + "Number of times we overflow ARS results before abort"); + +static struct workqueue_struct *nfit_wq; + struct nfit_table_prev { struct list_head spas; struct list_head memdevs; @@ -72,9 +85,90 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) return to_acpi_device(acpi_desc->dev); } +static int xlat_status(void *buf, unsigned int cmd) +{ + struct nd_cmd_clear_error *clear_err; + struct nd_cmd_ars_status *ars_status; + struct nd_cmd_ars_start *ars_start; + struct nd_cmd_ars_cap *ars_cap; + u16 flags; + + switch (cmd) { + case ND_CMD_ARS_CAP: + ars_cap = buf; + if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE) + return -ENOTTY; + + /* Command failed */ + if (ars_cap->status & 0xffff) + return -EIO; + + /* No supported scan types for this range */ + flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE; + if ((ars_cap->status >> 16 & flags) == 0) + return -ENOTTY; + break; + case ND_CMD_ARS_START: + ars_start = buf; + /* ARS is in progress */ + if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY) + return -EBUSY; + + /* Command failed */ + if (ars_start->status & 0xffff) + return -EIO; + break; + case ND_CMD_ARS_STATUS: + ars_status = buf; + /* Command failed */ + if (ars_status->status & 0xffff) + return -EIO; + /* Check extended status (Upper two bytes) */ + if (ars_status->status == NFIT_ARS_STATUS_DONE) + return 0; + + /* ARS is in progress */ + if (ars_status->status == NFIT_ARS_STATUS_BUSY) + return -EBUSY; + + /* No ARS performed for the current boot */ + if (ars_status->status == NFIT_ARS_STATUS_NONE) + return -EAGAIN; + + /* + * ARS interrupted, either we overflowed or some other + * agent wants the scan to stop. If we didn't overflow + * then just continue with the returned results. + */ + if (ars_status->status == NFIT_ARS_STATUS_INTR) { + if (ars_status->flags & NFIT_ARS_F_OVERFLOW) + return -ENOSPC; + return 0; + } + + /* Unknown status */ + if (ars_status->status >> 16) + return -EIO; + break; + case ND_CMD_CLEAR_ERROR: + clear_err = buf; + if (clear_err->status & 0xffff) + return -EIO; + if (!clear_err->cleared) + return -EIO; + if (clear_err->length > clear_err->cleared) + return clear_err->cleared; + break; + default: + break; + } + + return 0; +} + static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len) + unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); const struct nd_cmd_desc *desc = NULL; @@ -185,6 +279,8 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, * unfilled in the output buffer */ rc = buf_len - offset - in_buf.buffer.length; + if (cmd_rc) + *cmd_rc = xlat_status(buf, cmd); } else { dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n", __func__, dimm_name, cmd_name, buf_len, @@ -675,12 +771,11 @@ static struct attribute_group acpi_nfit_attribute_group = { .attrs = acpi_nfit_attributes, }; -const struct attribute_group *acpi_nfit_attribute_groups[] = { +static const struct attribute_group *acpi_nfit_attribute_groups[] = { &nvdimm_bus_attribute_group, &acpi_nfit_attribute_group, NULL, }; -EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups); static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev) { @@ -917,7 +1012,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) if (!adev) return; - for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++) + for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) set_bit(i, &nd_desc->dsm_mask); } @@ -1105,7 +1200,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, writeq(cmd, mmio->addr.base + offset); wmb_blk(nfit_blk); - if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) + if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH) readq(mmio->addr.base + offset); } @@ -1141,7 +1236,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, memcpy_to_pmem(mmio->addr.aperture + offset, iobuf + copied, c); else { - if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH) + if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) mmio_flush_range((void __force *) mmio->addr.aperture + offset, c); @@ -1328,13 +1423,13 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, memset(&flags, 0, sizeof(flags)); rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags, - sizeof(flags)); + sizeof(flags), NULL); if (rc >= 0 && flags.status == 0) nfit_blk->dimm_flags = flags.flags; else if (rc == -ENOTTY) { /* fall back to a conservative default */ - nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH; + nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH; rc = 0; } else rc = -ENXIO; @@ -1473,93 +1568,85 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, /* devm will free nfit_blk */ } -static int ars_get_cap(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_cap *cmd, u64 addr, u64 length) +static int ars_get_cap(struct acpi_nfit_desc *acpi_desc, + struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa) { - cmd->address = addr; - cmd->length = length; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + int cmd_rc, rc; - return nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd, - sizeof(*cmd)); + cmd->address = spa->address; + cmd->length = spa->length; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd, + sizeof(*cmd), &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; } -static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_start *cmd, u64 addr, u64 length) +static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa) { int rc; + int cmd_rc; + struct nd_cmd_ars_start ars_start; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; - cmd->address = addr; - cmd->length = length; - cmd->type = ND_ARS_PERSISTENT; + memset(&ars_start, 0, sizeof(ars_start)); + ars_start.address = spa->address; + ars_start.length = spa->length; + if (nfit_spa_type(spa) == NFIT_SPA_PM) + ars_start.type = ND_ARS_PERSISTENT; + else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) + ars_start.type = ND_ARS_VOLATILE; + else + return -ENOTTY; - while (1) { - rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd, - sizeof(*cmd)); - if (rc) - return rc; - switch (cmd->status) { - case 0: - return 0; - case 1: - /* ARS unsupported, but we should never get here */ - return 0; - case 6: - /* ARS is in progress */ - msleep(1000); - break; - default: - return -ENXIO; - } - } + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, + sizeof(ars_start), &cmd_rc); + + if (rc < 0) + return rc; + return cmd_rc; } -static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_status *cmd, u32 size) +static int ars_continue(struct acpi_nfit_desc *acpi_desc) { - int rc; + int rc, cmd_rc; + struct nd_cmd_ars_start ars_start; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + + memset(&ars_start, 0, sizeof(ars_start)); + ars_start.address = ars_status->restart_address; + ars_start.length = ars_status->restart_length; + ars_start.type = ars_status->type; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, + sizeof(ars_start), &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; +} - while (1) { - rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd, - size); - if (rc || cmd->status & 0xffff) - return -ENXIO; +static int ars_get_status(struct acpi_nfit_desc *acpi_desc) +{ + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + int rc, cmd_rc; - /* Check extended status (Upper two bytes) */ - switch (cmd->status >> 16) { - case 0: - return 0; - case 1: - /* ARS is in progress */ - msleep(1000); - break; - case 2: - /* No ARS performed for the current boot */ - return 0; - case 3: - /* TODO: error list overflow support */ - default: - return -ENXIO; - } - } + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status, + acpi_desc->ars_status_size, &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; } static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus, - struct nd_cmd_ars_status *ars_status, u64 start) + struct nd_cmd_ars_status *ars_status) { int rc; u32 i; - /* - * The address field returned by ars_status should be either - * less than or equal to the address we last started ARS for. - * The (start, length) returned by ars_status should also have - * non-zero overlap with the range we started ARS for. - * If this is not the case, bail. - */ - if (ars_status->address > start || - (ars_status->address + ars_status->length < start)) - return -ENXIO; - for (i = 0; i < ars_status->num_records; i++) { rc = nvdimm_bus_add_poison(nvdimm_bus, ars_status->records[i].err_address, @@ -1613,111 +1700,14 @@ static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc, return 0; } -static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc, - struct nd_region_desc *ndr_desc) -{ - struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; - struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus; - struct nd_cmd_ars_status *ars_status = NULL; - struct nd_cmd_ars_start *ars_start = NULL; - struct nd_cmd_ars_cap *ars_cap = NULL; - u64 start, len, cur, remaining; - u32 ars_status_size; - int rc; - - ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL); - if (!ars_cap) - return -ENOMEM; - - start = ndr_desc->res->start; - len = ndr_desc->res->end - ndr_desc->res->start + 1; - - rc = ars_get_cap(nd_desc, ars_cap, start, len); - if (rc) - goto out; - - /* - * If ARS is unsupported, or if the 'Persistent Memory Scrub' flag in - * extended status is not set, skip this but continue initialization - */ - if ((ars_cap->status & 0xffff) || - !(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) { - dev_warn(acpi_desc->dev, - "ARS unsupported (status: 0x%x), won't create an error list\n", - ars_cap->status); - goto out; - } - - /* - * Check if a full-range ARS has been run. If so, use those results - * without having to start a new ARS. - */ - ars_status_size = ars_cap->max_ars_out; - ars_status = kzalloc(ars_status_size, GFP_KERNEL); - if (!ars_status) { - rc = -ENOMEM; - goto out; - } - - rc = ars_get_status(nd_desc, ars_status, ars_status_size); - if (rc) - goto out; - - if (ars_status->address <= start && - (ars_status->address + ars_status->length >= start + len)) { - rc = ars_status_process_records(nvdimm_bus, ars_status, start); - goto out; - } - - /* - * ARS_STATUS can overflow if the number of poison entries found is - * greater than the maximum buffer size (ars_cap->max_ars_out) - * To detect overflow, check if the length field of ars_status - * is less than the length we supplied. If so, process the - * error entries we got, adjust the start point, and start again - */ - ars_start = kzalloc(sizeof(*ars_start), GFP_KERNEL); - if (!ars_start) - return -ENOMEM; - - cur = start; - remaining = len; - do { - u64 done, end; - - rc = ars_do_start(nd_desc, ars_start, cur, remaining); - if (rc) - goto out; - - rc = ars_get_status(nd_desc, ars_status, ars_status_size); - if (rc) - goto out; - - rc = ars_status_process_records(nvdimm_bus, ars_status, cur); - if (rc) - goto out; - - end = min(cur + remaining, - ars_status->address + ars_status->length); - done = end - cur; - cur += done; - remaining -= done; - } while (remaining); - - out: - kfree(ars_cap); - kfree(ars_start); - kfree(ars_status); - return rc; -} - static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc, struct acpi_nfit_memory_map *memdev, - struct acpi_nfit_system_address *spa) + struct nfit_spa *nfit_spa) { struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, memdev->device_handle); + struct acpi_nfit_system_address *spa = nfit_spa->spa; struct nd_blk_region_desc *ndbr_desc; struct nfit_mem *nfit_mem; int blk_valid = 0; @@ -1753,7 +1743,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, ndbr_desc->enable = acpi_nfit_blk_region_enable; ndbr_desc->disable = acpi_nfit_blk_region_disable; ndbr_desc->do_io = acpi_desc->blk_do_io; - if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc)) + nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) return -ENOMEM; break; } @@ -1773,7 +1765,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct resource res; int count = 0, rc; - if (nfit_spa->is_registered) + if (nfit_spa->nd_region) return 0; if (spa->range_index == 0) { @@ -1810,53 +1802,332 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, } nd_mapping = &nd_mappings[count++]; rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc, - memdev, spa); + memdev, nfit_spa); if (rc) - return rc; + goto out; } ndr_desc->nd_mapping = nd_mappings; ndr_desc->num_mappings = count; rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); if (rc) - return rc; + goto out; nvdimm_bus = acpi_desc->nvdimm_bus; if (nfit_spa_type(spa) == NFIT_SPA_PM) { rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc); - if (rc) + if (rc) { dev_warn(acpi_desc->dev, "failed to insert pmem resource to iomem: %d\n", rc); - - rc = acpi_nfit_find_poison(acpi_desc, ndr_desc); - if (rc) { - dev_err(acpi_desc->dev, - "error while performing ARS to find poison: %d\n", - rc); - return rc; + goto out; } - if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc)) - return -ENOMEM; + + nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) + rc = -ENOMEM; } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { - if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc)) - return -ENOMEM; + nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) + rc = -ENOMEM; + } + + out: + if (rc) + dev_err(acpi_desc->dev, "failed to register spa range %d\n", + nfit_spa->spa->range_index); + return rc; +} + +static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc, + u32 max_ars) +{ + struct device *dev = acpi_desc->dev; + struct nd_cmd_ars_status *ars_status; + + if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) { + memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size); + return 0; } - nfit_spa->is_registered = 1; + if (acpi_desc->ars_status) + devm_kfree(dev, acpi_desc->ars_status); + acpi_desc->ars_status = NULL; + ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL); + if (!ars_status) + return -ENOMEM; + acpi_desc->ars_status = ars_status; + acpi_desc->ars_status_size = max_ars; return 0; } -static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + struct acpi_nfit_system_address *spa = nfit_spa->spa; + int rc; + + if (!nfit_spa->max_ars) { + struct nd_cmd_ars_cap ars_cap; + + memset(&ars_cap, 0, sizeof(ars_cap)); + rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa); + if (rc < 0) + return rc; + nfit_spa->max_ars = ars_cap.max_ars_out; + nfit_spa->clear_err_unit = ars_cap.clear_err_unit; + /* check that the supported scrub types match the spa type */ + if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE && + ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0) + return -ENOTTY; + else if (nfit_spa_type(spa) == NFIT_SPA_PM && + ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0) + return -ENOTTY; + } + + if (ars_status_alloc(acpi_desc, nfit_spa->max_ars)) + return -ENOMEM; + + rc = ars_get_status(acpi_desc); + if (rc < 0 && rc != -ENOSPC) + return rc; + + if (ars_status_process_records(acpi_desc->nvdimm_bus, + acpi_desc->ars_status)) + return -ENOMEM; + + return 0; +} + +static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + struct acpi_nfit_system_address *spa = nfit_spa->spa; + unsigned int overflow_retry = scrub_overflow_abort; + u64 init_ars_start = 0, init_ars_len = 0; + struct device *dev = acpi_desc->dev; + unsigned int tmo = scrub_timeout; + int rc; + + if (nfit_spa->ars_done || !nfit_spa->nd_region) + return; + + rc = ars_start(acpi_desc, nfit_spa); + /* + * If we timed out the initial scan we'll still be busy here, + * and will wait another timeout before giving up permanently. + */ + if (rc < 0 && rc != -EBUSY) + return; + + do { + u64 ars_start, ars_len; + + if (acpi_desc->cancel) + break; + rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); + if (rc == -ENOTTY) + break; + if (rc == -EBUSY && !tmo) { + dev_warn(dev, "range %d ars timeout, aborting\n", + spa->range_index); + break; + } + + if (rc == -EBUSY) { + /* + * Note, entries may be appended to the list + * while the lock is dropped, but the workqueue + * being active prevents entries being deleted / + * freed. + */ + mutex_unlock(&acpi_desc->init_mutex); + ssleep(1); + tmo--; + mutex_lock(&acpi_desc->init_mutex); + continue; + } + + /* we got some results, but there are more pending... */ + if (rc == -ENOSPC && overflow_retry--) { + if (!init_ars_len) { + init_ars_len = acpi_desc->ars_status->length; + init_ars_start = acpi_desc->ars_status->address; + } + rc = ars_continue(acpi_desc); + } + + if (rc < 0) { + dev_warn(dev, "range %d ars continuation failed\n", + spa->range_index); + break; + } + + if (init_ars_len) { + ars_start = init_ars_start; + ars_len = init_ars_len; + } else { + ars_start = acpi_desc->ars_status->address; + ars_len = acpi_desc->ars_status->length; + } + dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n", + spa->range_index, ars_start, ars_len); + /* notify the region about new poison entries */ + nvdimm_region_notify(nfit_spa->nd_region, + NVDIMM_REVALIDATE_POISON); + break; + } while (1); +} + +static void acpi_nfit_scrub(struct work_struct *work) { + struct device *dev; + u64 init_scrub_length = 0; struct nfit_spa *nfit_spa; + u64 init_scrub_address = 0; + bool init_ars_done = false; + struct acpi_nfit_desc *acpi_desc; + unsigned int tmo = scrub_timeout; + unsigned int overflow_retry = scrub_overflow_abort; + acpi_desc = container_of(work, typeof(*acpi_desc), work); + dev = acpi_desc->dev; + + /* + * We scrub in 2 phases. The first phase waits for any platform + * firmware initiated scrubs to complete and then we go search for the + * affected spa regions to mark them scanned. In the second phase we + * initiate a directed scrub for every range that was not scrubbed in + * phase 1. + */ + + /* process platform firmware initiated scrubs */ + retry: + mutex_lock(&acpi_desc->init_mutex); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - int rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + struct nd_cmd_ars_status *ars_status; + struct acpi_nfit_system_address *spa; + u64 ars_start, ars_len; + int rc; - if (rc) - return rc; + if (acpi_desc->cancel) + break; + + if (nfit_spa->nd_region) + continue; + + if (init_ars_done) { + /* + * No need to re-query, we're now just + * reconciling all the ranges covered by the + * initial scrub + */ + rc = 0; + } else + rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); + + if (rc == -ENOTTY) { + /* no ars capability, just register spa and move on */ + acpi_nfit_register_region(acpi_desc, nfit_spa); + continue; + } + + if (rc == -EBUSY && !tmo) { + /* fallthrough to directed scrub in phase 2 */ + dev_warn(dev, "timeout awaiting ars results, continuing...\n"); + break; + } else if (rc == -EBUSY) { + mutex_unlock(&acpi_desc->init_mutex); + ssleep(1); + tmo--; + goto retry; + } + + /* we got some results, but there are more pending... */ + if (rc == -ENOSPC && overflow_retry--) { + ars_status = acpi_desc->ars_status; + /* + * Record the original scrub range, so that we + * can recall all the ranges impacted by the + * initial scrub. + */ + if (!init_scrub_length) { + init_scrub_length = ars_status->length; + init_scrub_address = ars_status->address; + } + rc = ars_continue(acpi_desc); + if (rc == 0) { + mutex_unlock(&acpi_desc->init_mutex); + goto retry; + } + } + + if (rc < 0) { + /* + * Initial scrub failed, we'll give it one more + * try below... + */ + break; + } + + /* We got some final results, record completed ranges */ + ars_status = acpi_desc->ars_status; + if (init_scrub_length) { + ars_start = init_scrub_address; + ars_len = ars_start + init_scrub_length; + } else { + ars_start = ars_status->address; + ars_len = ars_status->length; + } + spa = nfit_spa->spa; + + if (!init_ars_done) { + init_ars_done = true; + dev_dbg(dev, "init scrub %#llx + %#llx complete\n", + ars_start, ars_len); + } + if (ars_start <= spa->address && ars_start + ars_len + >= spa->address + spa->length) + acpi_nfit_register_region(acpi_desc, nfit_spa); } + + /* + * For all the ranges not covered by an initial scrub we still + * want to see if there are errors, but it's ok to discover them + * asynchronously. + */ + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + /* + * Flag all the ranges that still need scrubbing, but + * register them now to make data available. + */ + if (nfit_spa->nd_region) + nfit_spa->ars_done = 1; + else + acpi_nfit_register_region(acpi_desc, nfit_spa); + } + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + acpi_nfit_async_scrub(acpi_desc, nfit_spa); + mutex_unlock(&acpi_desc->init_mutex); +} + +static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_spa *nfit_spa; + int rc; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) { + /* BLK regions don't need to wait for ars results */ + rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + if (rc) + return rc; + } + + queue_work(nfit_wq, &acpi_desc->work); return 0; } @@ -1942,15 +2213,64 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) } EXPORT_SYMBOL_GPL(acpi_nfit_init); -static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) +struct acpi_nfit_flush_work { + struct work_struct work; + struct completion cmp; +}; + +static void flush_probe(struct work_struct *work) { - struct nvdimm_bus_descriptor *nd_desc; - struct acpi_nfit_desc *acpi_desc; - struct device *dev = &adev->dev; + struct acpi_nfit_flush_work *flush; - acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); - if (!acpi_desc) - return ERR_PTR(-ENOMEM); + flush = container_of(work, typeof(*flush), work); + complete(&flush->cmp); +} + +static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct device *dev = acpi_desc->dev; + struct acpi_nfit_flush_work flush; + + /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ + device_lock(dev); + device_unlock(dev); + + /* + * Scrub work could take 10s of seconds, userspace may give up so we + * need to be interruptible while waiting. + */ + INIT_WORK_ONSTACK(&flush.work, flush_probe); + COMPLETION_INITIALIZER_ONSTACK(flush.cmp); + queue_work(nfit_wq, &flush.work); + return wait_for_completion_interruptible(&flush.cmp); +} + +static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + + if (nvdimm) + return 0; + if (cmd != ND_CMD_ARS_START) + return 0; + + /* + * The kernel and userspace may race to initiate a scrub, but + * the scrub thread is prepared to lose that initial race. It + * just needs guarantees that any ars it initiates are not + * interrupted by any intervening start reqeusts from userspace. + */ + if (work_busy(&acpi_desc->work)) + return -EBUSY; + + return 0; +} + +void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) +{ + struct nvdimm_bus_descriptor *nd_desc; dev_set_drvdata(dev, acpi_desc); acpi_desc->dev = dev; @@ -1958,14 +2278,10 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) nd_desc = &acpi_desc->nd_desc; nd_desc->provider_name = "ACPI.NFIT"; nd_desc->ndctl = acpi_nfit_ctl; + nd_desc->flush_probe = acpi_nfit_flush_probe; + nd_desc->clear_to_send = acpi_nfit_clear_to_send; nd_desc->attr_groups = acpi_nfit_attribute_groups; - acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc); - if (!acpi_desc->nvdimm_bus) { - devm_kfree(dev, acpi_desc); - return ERR_PTR(-ENXIO); - } - INIT_LIST_HEAD(&acpi_desc->spa_maps); INIT_LIST_HEAD(&acpi_desc->spas); INIT_LIST_HEAD(&acpi_desc->dcrs); @@ -1976,9 +2292,9 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); mutex_init(&acpi_desc->init_mutex); - - return acpi_desc; + INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); } +EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); static int acpi_nfit_add(struct acpi_device *adev) { @@ -1997,12 +2313,13 @@ static int acpi_nfit_add(struct acpi_device *adev) return 0; } - acpi_desc = acpi_nfit_desc_init(adev); - if (IS_ERR(acpi_desc)) { - dev_err(dev, "%s: error initializing acpi_desc: %ld\n", - __func__, PTR_ERR(acpi_desc)); - return PTR_ERR(acpi_desc); - } + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) + return -ENOMEM; + acpi_nfit_desc_init(acpi_desc, &adev->dev); + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); + if (!acpi_desc->nvdimm_bus) + return -ENOMEM; /* * Save the acpi header for later and then skip it, @@ -2041,6 +2358,8 @@ static int acpi_nfit_remove(struct acpi_device *adev) { struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + acpi_desc->cancel = 1; + flush_workqueue(nfit_wq); nvdimm_bus_unregister(acpi_desc->nvdimm_bus); return 0; } @@ -2065,12 +2384,19 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) } if (!acpi_desc) { - acpi_desc = acpi_nfit_desc_init(adev); - if (IS_ERR(acpi_desc)) { - dev_err(dev, "%s: error initializing acpi_desc: %ld\n", - __func__, PTR_ERR(acpi_desc)); + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) goto out_unlock; - } + acpi_nfit_desc_init(acpi_desc, &adev->dev); + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); + if (!acpi_desc->nvdimm_bus) + goto out_unlock; + } else { + /* + * Finish previous registration before considering new + * regions. + */ + flush_workqueue(nfit_wq); } /* Evaluate _FIT */ @@ -2138,12 +2464,17 @@ static __init int nfit_init(void) acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); + nfit_wq = create_singlethread_workqueue("nfit"); + if (!nfit_wq) + return -ENOMEM; + return acpi_bus_register_driver(&acpi_nfit_driver); } static __exit void nfit_exit(void) { acpi_bus_unregister_driver(&acpi_nfit_driver); + destroy_workqueue(nfit_wq); } module_init(nfit_init); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 3d549a383659..c75576b2d50e 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -14,6 +14,7 @@ */ #ifndef __NFIT_H__ #define __NFIT_H__ +#include <linux/workqueue.h> #include <linux/libnvdimm.h> #include <linux/types.h> #include <linux/uuid.h> @@ -40,15 +41,32 @@ enum nfit_uuids { NFIT_UUID_MAX, }; +enum nfit_fic { + NFIT_FIC_BYTE = 0x101, /* byte-addressable energy backed */ + NFIT_FIC_BLK = 0x201, /* block-addressable non-energy backed */ + NFIT_FIC_BYTEN = 0x301, /* byte-addressable non-energy backed */ +}; + enum { - ND_BLK_READ_FLUSH = 1, - ND_BLK_DCR_LATCH = 2, + NFIT_BLK_READ_FLUSH = 1, + NFIT_BLK_DCR_LATCH = 2, + NFIT_ARS_STATUS_DONE = 0, + NFIT_ARS_STATUS_BUSY = 1 << 16, + NFIT_ARS_STATUS_NONE = 2 << 16, + NFIT_ARS_STATUS_INTR = 3 << 16, + NFIT_ARS_START_BUSY = 6, + NFIT_ARS_CAP_NONE = 1, + NFIT_ARS_F_OVERFLOW = 1, + NFIT_ARS_TIMEOUT = 90, }; struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; - int is_registered; + struct nd_region *nd_region; + unsigned int ars_done:1; + u32 clear_err_unit; + u32 max_ars; }; struct nfit_dcr { @@ -110,6 +128,10 @@ struct acpi_nfit_desc { struct list_head idts; struct nvdimm_bus *nvdimm_bus; struct device *dev; + struct nd_cmd_ars_status *ars_status; + size_t ars_status_size; + struct work_struct work; + unsigned int cancel:1; unsigned long dimm_dsm_force_en; unsigned long bus_dsm_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -182,5 +204,5 @@ static inline struct acpi_nfit_desc *to_acpi_desc( const u8 *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz); -extern const struct attribute_group *acpi_nfit_attribute_groups[]; +void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); #endif /* __NFIT_H__ */ diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 5d28e9405f32..33557481d452 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -133,6 +133,78 @@ static int nvdimm_bus_remove(struct device *dev) return rc; } +void nd_device_notify(struct device *dev, enum nvdimm_event event) +{ + device_lock(dev); + if (dev->driver) { + struct nd_device_driver *nd_drv; + + nd_drv = to_nd_device_driver(dev->driver); + if (nd_drv->notify) + nd_drv->notify(dev, event); + } + device_unlock(dev); +} +EXPORT_SYMBOL(nd_device_notify); + +void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + + if (!nvdimm_bus) + return; + + /* caller is responsible for holding a reference on the device */ + nd_device_notify(&nd_region->dev, event); +} +EXPORT_SYMBOL_GPL(nvdimm_region_notify); + +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc; + struct nd_cmd_clear_error clear_err; + struct nd_cmd_ars_cap ars_cap; + u32 clear_err_unit, mask; + int cmd_rc, rc; + + if (!nvdimm_bus) + return -ENXIO; + + nd_desc = nvdimm_bus->nd_desc; + if (!nd_desc->ndctl) + return -ENXIO; + + memset(&ars_cap, 0, sizeof(ars_cap)); + ars_cap.address = phys; + ars_cap.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap, + sizeof(ars_cap), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + clear_err_unit = ars_cap.clear_err_unit; + if (!clear_err_unit || !is_power_of_2(clear_err_unit)) + return -ENXIO; + + mask = clear_err_unit - 1; + if ((phys | len) & mask) + return -ENXIO; + memset(&clear_err, 0, sizeof(clear_err)); + clear_err.address = phys; + clear_err.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err, + sizeof(clear_err), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + return clear_err.cleared; +} +EXPORT_SYMBOL_GPL(nvdimm_clear_poison); + static struct bus_type nvdimm_bus_type = { .name = "nd", .uevent = nvdimm_bus_uevent, @@ -395,6 +467,12 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = { .out_num = 3, .out_sizes = { 4, 4, UINT_MAX, }, }, + [ND_CMD_CLEAR_ERROR] = { + .in_num = 2, + .in_sizes = { 8, 8, }, + .out_num = 3, + .out_sizes = { 4, 4, 8, }, + }, }; const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) @@ -463,17 +541,37 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) } while (true); } +static int pmem_active(struct device *dev, void *data) +{ + if (is_nd_pmem(dev) && dev->driver) + return -EBUSY; + return 0; +} + /* set_config requires an idle interleave set */ -static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd) +static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, + struct nvdimm *nvdimm, unsigned int cmd) { - struct nvdimm_bus *nvdimm_bus; + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + + /* ask the bus provider if it would like to block this request */ + if (nd_desc->clear_to_send) { + int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd); + + if (rc) + return rc; + } + + /* require clear error to go through the pmem driver */ + if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR) + return device_for_each_child(&nvdimm_bus->dev, NULL, + pmem_active); if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) return 0; - nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev); + /* prevent label manipulation while the kernel owns label updates */ wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev); - if (atomic_read(&nvdimm->busy)) return -EBUSY; return 0; @@ -513,10 +611,11 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, /* fail write commands (when read-only) */ if (read_only) - switch (ioctl_cmd) { - case ND_IOCTL_VENDOR: - case ND_IOCTL_SET_CONFIG_DATA: - case ND_IOCTL_ARS_START: + switch (cmd) { + case ND_CMD_VENDOR: + case ND_CMD_SET_CONFIG_DATA: + case ND_CMD_ARS_START: + case ND_CMD_CLEAR_ERROR: dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", nvdimm ? nvdimm_cmd_name(cmd) : nvdimm_bus_cmd_name(cmd)); @@ -583,11 +682,11 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } nvdimm_bus_lock(&nvdimm_bus->dev); - rc = nd_cmd_clear_to_send(nvdimm, cmd); + rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd); if (rc) goto out_unlock; - rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len); + rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL); if (rc < 0) goto out_unlock; if (copy_to_user(p, buf, buf_len)) @@ -602,14 +701,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long id = (long) file->private_data; - int rc = -ENXIO, read_only; + int rc = -ENXIO, ro; struct nvdimm_bus *nvdimm_bus; - read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); mutex_lock(&nvdimm_bus_list_mutex); list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { if (nvdimm_bus->id == id) { - rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg); + rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg); break; } } @@ -633,10 +732,10 @@ static int match_dimm(struct device *dev, void *data) static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int rc = -ENXIO, read_only; + int rc = -ENXIO, ro; struct nvdimm_bus *nvdimm_bus; - read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); mutex_lock(&nvdimm_bus_list_mutex); list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { struct device *dev = device_find_child(&nvdimm_bus->dev, @@ -647,7 +746,7 @@ static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) continue; nvdimm = to_nvdimm(dev); - rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg); + rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg); put_device(dev); break; } diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 2e2832b83c93..79646d0c3277 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -298,6 +298,15 @@ static int flush_regions_dimms(struct device *dev, void *data) static ssize_t wait_probe_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + int rc; + + if (nd_desc->flush_probe) { + rc = nd_desc->flush_probe(nd_desc); + if (rc) + return rc; + } nd_synchronize(); device_for_each_child(dev, NULL, flush_regions_dimms); return sprintf(buf, "1\n"); @@ -408,33 +417,11 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) set_badblock(bb, start_sector, num_sectors); } -/** - * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks - * @ndns: the namespace containing poison ranges - * @bb: badblocks instance to populate - * @offset: offset at the start of the namespace before 'sector 0' - * - * The poison list generated during NFIT initialization may contain multiple, - * possibly overlapping ranges in the SPA (System Physical Address) space. - * Compare each of these ranges to the namespace currently being initialized, - * and add badblocks to the gendisk for all matching sub-ranges - */ -void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, - struct badblocks *bb, resource_size_t offset) +static void namespace_add_poison(struct list_head *poison_list, + struct badblocks *bb, struct resource *res) { - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_region *nd_region = to_nd_region(ndns->dev.parent); - struct nvdimm_bus *nvdimm_bus; - struct list_head *poison_list; - u64 ns_start, ns_end, ns_size; struct nd_poison *pl; - ns_size = nvdimm_namespace_capacity(ndns) - offset; - ns_start = nsio->res.start + offset; - ns_end = nsio->res.end; - - nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent); - poison_list = &nvdimm_bus->poison_list; if (list_empty(poison_list)) return; @@ -442,37 +429,69 @@ void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, u64 pl_end = pl->start + pl->length - 1; /* Discard intervals with no intersection */ - if (pl_end < ns_start) + if (pl_end < res->start) continue; - if (pl->start > ns_end) + if (pl->start > res->end) continue; /* Deal with any overlap after start of the namespace */ - if (pl->start >= ns_start) { + if (pl->start >= res->start) { u64 start = pl->start; u64 len; - if (pl_end <= ns_end) + if (pl_end <= res->end) len = pl->length; else - len = ns_start + ns_size - pl->start; - __add_badblock_range(bb, start - ns_start, len); + len = res->start + resource_size(res) + - pl->start; + __add_badblock_range(bb, start - res->start, len); continue; } /* Deal with overlap for poison starting before the namespace */ - if (pl->start < ns_start) { + if (pl->start < res->start) { u64 len; - if (pl_end < ns_end) - len = pl->start + pl->length - ns_start; + if (pl_end < res->end) + len = pl->start + pl->length - res->start; else - len = ns_size; + len = resource_size(res); __add_badblock_range(bb, 0, len); } } } + +/** + * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks + * @ndns: the namespace containing poison ranges + * @bb: badblocks instance to populate + * @offset: offset at the start of the namespace before 'sector 0' + * + * The poison list generated during NFIT initialization may contain multiple, + * possibly overlapping ranges in the SPA (System Physical Address) space. + * Compare each of these ranges to the namespace currently being initialized, + * and add badblocks to the gendisk for all matching sub-ranges + */ +void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, + struct badblocks *bb, resource_size_t offset) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + struct nvdimm_bus *nvdimm_bus; + struct list_head *poison_list; + struct resource res = { + .start = nsio->res.start + offset, + .end = nsio->res.end, + }; + + nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent); + poison_list = &nvdimm_bus->poison_list; + + nvdimm_bus_lock(&nvdimm_bus->dev); + namespace_add_poison(poison_list, bb, &res); + nvdimm_bus_unlock(&nvdimm_bus->dev); +} EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison); -static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) { struct nd_poison *pl; @@ -487,12 +506,12 @@ static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) return 0; } -int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) { struct nd_poison *pl; if (list_empty(&nvdimm_bus->poison_list)) - return __add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length); /* * There is a chance this is a duplicate, check for those first. @@ -512,7 +531,18 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) * as any overlapping ranges will get resolved when the list is consumed * and converted to badblocks */ - return __add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length); +} + +int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +{ + int rc; + + nvdimm_bus_lock(&nvdimm_bus->dev); + rc = bus_add_poison(nvdimm_bus, addr, length); + nvdimm_bus_unlock(&nvdimm_bus->dev); + + return rc; } EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); @@ -553,7 +583,11 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) nd_synchronize(); device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); + + nvdimm_bus_lock(&nvdimm_bus->dev); free_poison_list(&nvdimm_bus->poison_list); + nvdimm_bus_unlock(&nvdimm_bus->dev); + nvdimm_bus_destroy_ndctl(nvdimm_bus); device_unregister(&nvdimm_bus->dev); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 651b8d19d324..c56f88217924 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -75,7 +75,7 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) memset(cmd, 0, sizeof(*cmd)); nd_desc = nvdimm_bus->nd_desc; return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd)); + ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), NULL); } int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) @@ -120,7 +120,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) cmd->in_offset = offset; rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), ND_CMD_GET_CONFIG_DATA, cmd, - cmd->in_length + sizeof(*cmd)); + cmd->in_length + sizeof(*cmd), NULL); if (rc || cmd->status) { rc = -ENXIO; break; @@ -171,7 +171,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, status = ((void *) cmd) + cmd_size - sizeof(u32); rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_SET_CONFIG_DATA, cmd, cmd_size); + ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL); if (rc || *status) { rc = rc ? rc : -ENXIO; break; diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index ba1633b9da31..1799bd97a9ce 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -18,6 +18,7 @@ #include <linux/mutex.h> #include <linux/ndctl.h> #include <linux/types.h> +#include <linux/nd.h> #include "label.h" enum { @@ -168,6 +169,7 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size); void wait_nvdimm_bus_probe_idle(struct device *dev); void nd_device_register(struct device *dev); void nd_device_unregister(struct device *dev, enum nd_async_mode mode); +void nd_device_notify(struct device *dev, enum nvdimm_event event); int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, size_t len); ssize_t nd_sector_size_show(unsigned long current_lbasize, @@ -184,6 +186,8 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len); struct nd_btt *to_nd_btt(struct device *dev); struct nd_gen_sb { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index ba8d5b6bfad0..ca5721c306bb 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -63,26 +63,56 @@ static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) return false; } +static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, + unsigned int len) +{ + struct device *dev = disk_to_dev(pmem->pmem_disk); + sector_t sector; + long cleared; + + sector = (offset - pmem->data_offset) / 512; + cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); + + if (cleared > 0 && cleared / 512) { + dev_dbg(dev, "%s: %llx clear %ld sector%s\n", + __func__, (unsigned long long) sector, + cleared / 512, cleared / 512 > 1 ? "s" : ""); + badblocks_clear(&pmem->bb, sector, cleared / 512); + } + invalidate_pmem(pmem->virt_addr + offset, len); +} + static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, sector_t sector) { + int rc = 0; + bool bad_pmem = false; void *mem = kmap_atomic(page); phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + bad_pmem = true; + if (rw == READ) { - if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) - return -EIO; - memcpy_from_pmem(mem + off, pmem_addr, len); - flush_dcache_page(page); + if (unlikely(bad_pmem)) + rc = -EIO; + else { + memcpy_from_pmem(mem + off, pmem_addr, len); + flush_dcache_page(page); + } } else { flush_dcache_page(page); memcpy_to_pmem(pmem_addr, mem + off, len); + if (unlikely(bad_pmem)) { + pmem_clear_poison(pmem, pmem_off, len); + memcpy_to_pmem(pmem_addr, mem + off, len); + } } kunmap_atomic(mem); - return 0; + return rc; } static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) @@ -561,12 +591,27 @@ static int nd_pmem_remove(struct device *dev) return 0; } +static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +{ + struct pmem_device *pmem = dev_get_drvdata(dev); + struct nd_namespace_common *ndns = pmem->ndns; + + if (event != NVDIMM_REVALIDATE_POISON) + return; + + if (is_nd_btt(dev)) + nvdimm_namespace_add_poison(ndns, &pmem->bb, 0); + else + nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset); +} + MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); static struct nd_device_driver nd_pmem_driver = { .probe = nd_pmem_probe, .remove = nd_pmem_remove, + .notify = nd_pmem_notify, .drv = { .name = "nd_pmem", }, diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 7da63eac78ee..4b7715e29cff 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -93,9 +93,21 @@ static int nd_region_remove(struct device *dev) return 0; } +static int child_notify(struct device *dev, void *data) +{ + nd_device_notify(dev, *(enum nvdimm_event *) data); + return 0; +} + +static void nd_region_notify(struct device *dev, enum nvdimm_event event) +{ + device_for_each_child(dev, &event, child_notify); +} + static struct nd_device_driver nd_region_driver = { .probe = nd_region_probe, .remove = nd_region_remove, + .notify = nd_region_notify, .drv = { .name = "nd_region", }, |