summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-18 14:48:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-18 14:48:11 -0700
commitb2fc97c18614f99179700be263ecbc667c91a4e8 (patch)
tree13914a5bb2fcef7691c84796ab5a36459b949873
parent68b59730459e5d1fe4e0bbeb04ceb9df0f002270 (diff)
parent9364a7e40d54e6858479f0a96e1a04aa1204be16 (diff)
Merge tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock
Pull memblock updates from Mike Rapoport: - 'reserve_mem' command line parameter to allow creation of named memory reservation at boot time. The driving use-case is to improve the ability of pstore to retain ramoops data across reboots. - cleanups and small improvements in memblock and mm_init - new tests cases in memblock test suite * tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock: memblock tests: fix implicit declaration of function 'numa_valid_node' memblock: Move late alloc warning down to phys alloc pstore/ramoops: Add ramoops.mem_name= command line option mm/memblock: Add "reserve_mem" to reserved named memory at boot up mm/mm_init.c: don't initialize page->lru again mm/mm_init.c: not always search next deferred_init_pfn from very beginning mm/mm_init.c: use deferred_init_mem_pfn_range_in_zone() to decide loop condition mm/mm_init.c: get the highest zone directly mm/mm_init.c: move nr_initialised reset down a bit mm/memblock: fix a typo in description of for_each_mem_region() mm/mm_init.c: use memblock_region_memory_base_pfn() to get startpfn mm/memblock: use PAGE_ALIGN_DOWN to get pgend in free_memmap mm/memblock: return true directly on finding overlap region memblock tests: add memblock_overlaps_region_checks mm/memblock: fix comment for memblock_isolate_range() memblock tests: add memblock_reserve_many_may_conflict_check() memblock tests: add memblock_reserve_all_locations_check() mm/memblock: remove empty dummy entry
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt22
-rw-r--r--Documentation/admin-guide/ramoops.rst13
-rw-r--r--fs/pstore/ram.c14
-rw-r--r--include/linux/memblock.h21
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/memblock.c151
-rw-r--r--mm/mm_init.c69
-rw-r--r--tools/include/linux/mm.h1
-rw-r--r--tools/include/linux/numa.h5
-rw-r--r--tools/testing/memblock/tests/basic_api.c314
-rw-r--r--tools/testing/memblock/tests/common.c8
-rw-r--r--tools/testing/memblock/tests/common.h4
12 files changed, 542 insertions, 82 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 740ca2bc2822..0ca8452ea1ad 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5674,6 +5674,28 @@
them. If <base> is less than 0x10000, the region
is assumed to be I/O ports; otherwise it is memory.
+ reserve_mem= [RAM]
+ Format: nn[KNG]:<align>:<label>
+ Reserve physical memory and label it with a name that
+ other subsystems can use to access it. This is typically
+ used for systems that do not wipe the RAM, and this command
+ line will try to reserve the same physical memory on
+ soft reboots. Note, it is not guaranteed to be the same
+ location. For example, if anything about the system changes
+ or if booting a different kernel. It can also fail if KASLR
+ places the kernel at the location of where the RAM reservation
+ was from a previous boot, the new reservation will be at a
+ different location.
+ Any subsystem using this feature must add a way to verify
+ that the contents of the physical memory is from a previous
+ boot, as there may be cases where the memory will not be
+ located at the same location.
+
+ The format is size:align:label for example, to request
+ 12 megabytes of 4096 alignment for ramoops:
+
+ reserve_mem=12M:4096:oops ramoops.mem_name=oops
+
reservetop= [X86-32,EARLY]
Format: nn[KMG]
Reserves a hole at the top of the kernel virtual
diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst
index e9f85142182d..6f534a707b2a 100644
--- a/Documentation/admin-guide/ramoops.rst
+++ b/Documentation/admin-guide/ramoops.rst
@@ -23,6 +23,8 @@ and type of the memory area are set using three variables:
* ``mem_size`` for the size. The memory size will be rounded down to a
power of two.
* ``mem_type`` to specify if the memory type (default is pgprot_writecombine).
+ * ``mem_name`` to specify a memory region defined by ``reserve_mem`` command
+ line parameter.
Typically the default value of ``mem_type=0`` should be used as that sets the pstore
mapping to pgprot_writecombine. Setting ``mem_type=1`` attempts to use
@@ -118,6 +120,17 @@ Setting the ramoops parameters can be done in several different manners:
return ret;
}
+ D. Using a region of memory reserved via ``reserve_mem`` command line
+ parameter. The address and size will be defined by the ``reserve_mem``
+ parameter. Note, that ``reserve_mem`` may not always allocate memory
+ in the same location, and cannot be relied upon. Testing will need
+ to be done, and it may not work on every machine, nor every kernel.
+ Consider this a "best effort" approach. The ``reserve_mem`` option
+ takes a size, alignment and name as arguments. The name is used
+ to map the memory to a label that can be retrieved by ramoops.
+
+ reserver_mem=2M:4096:oops ramoops.mem_name=oops
+
You can specify either RAM memory or peripheral devices' memory. However, when
specifying RAM, be sure to reserve the memory by issuing memblock_reserve()
very early in the architecture code, e.g.::
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index b1a455f42e93..4311fcbc84f2 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -50,6 +50,10 @@ module_param_hw(mem_address, ullong, other, 0400);
MODULE_PARM_DESC(mem_address,
"start of reserved RAM used to store oops/panic logs");
+static char *mem_name;
+module_param_named(mem_name, mem_name, charp, 0400);
+MODULE_PARM_DESC(mem_name, "name of kernel param that holds addr");
+
static ulong mem_size;
module_param(mem_size, ulong, 0400);
MODULE_PARM_DESC(mem_size,
@@ -914,6 +918,16 @@ static void __init ramoops_register_dummy(void)
{
struct ramoops_platform_data pdata;
+ if (mem_name) {
+ phys_addr_t start;
+ phys_addr_t size;
+
+ if (reserve_mem_find_by_name(mem_name, &start, &size)) {
+ mem_address = start;
+ mem_size = size;
+ }
+ }
+
/*
* Prepare a dummy platform data structure to carry the module
* parameters. If mem_size isn't set, then there are no module
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e2082240586d..45cac33334c8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -299,25 +299,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
unsigned long *out_spfn,
unsigned long *out_epfn);
-/**
- * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free
- * memblock areas
- * @i: u64 used as loop variable
- * @zone: zone in which all of the memory blocks reside
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- *
- * Walks over free (memory && !reserved) areas of memblock in a specific
- * zone. Available once memblock and an empty zone is initialized. The main
- * assumption is that the zone start, end, and pgdat have been associated.
- * This way we can use the zone to determine NUMA node, and if a given part
- * of the memblock is valid for the zone.
- */
-#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \
- for (i = 0, \
- __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \
- i != U64_MAX; \
- __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
/**
* for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
@@ -565,7 +546,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
}
/**
- * for_each_mem_region - itereate over memory regions
+ * for_each_mem_region - iterate over memory regions
* @region: loop variable
*/
#define for_each_mem_region(region) \
diff --git a/include/linux/mm.h b/include/linux/mm.h
index eb7c96d24ac0..fe49d432700e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4261,4 +4261,6 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
void vma_pgtable_walk_begin(struct vm_area_struct *vma);
void vma_pgtable_walk_end(struct vm_area_struct *vma);
+int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
+
#endif /* _LINUX_MM_H */
diff --git a/mm/memblock.c b/mm/memblock.c
index e81fb68f7f88..3b9dc2d89b8a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -114,12 +114,10 @@ static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS
struct memblock memblock __initdata_memblock = {
.memory.regions = memblock_memory_init_regions,
- .memory.cnt = 1, /* empty dummy entry */
.memory.max = INIT_MEMBLOCK_MEMORY_REGIONS,
.memory.name = "memory",
.reserved.regions = memblock_reserved_init_regions,
- .reserved.cnt = 1, /* empty dummy entry */
.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
.reserved.name = "reserved",
@@ -130,7 +128,6 @@ struct memblock memblock __initdata_memblock = {
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
struct memblock_type physmem = {
.regions = memblock_physmem_init_regions,
- .cnt = 1, /* empty dummy entry */
.max = INIT_PHYSMEM_REGIONS,
.name = "physmem",
};
@@ -197,8 +194,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
for (i = 0; i < type->cnt; i++)
if (memblock_addrs_overlap(base, size, type->regions[i].base,
type->regions[i].size))
- break;
- return i < type->cnt;
+ return true;
+ return false;
}
/**
@@ -356,7 +353,6 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
/* Special case for empty arrays */
if (type->cnt == 0) {
WARN_ON(type->total_size != 0);
- type->cnt = 1;
type->regions[0].base = 0;
type->regions[0].size = 0;
type->regions[0].flags = 0;
@@ -600,12 +596,13 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
/* special case for empty array */
if (type->regions[0].size == 0) {
- WARN_ON(type->cnt != 1 || type->total_size);
+ WARN_ON(type->cnt != 0 || type->total_size);
type->regions[0].base = base;
type->regions[0].size = size;
type->regions[0].flags = flags;
memblock_set_region_node(&type->regions[0], nid);
type->total_size = size;
+ type->cnt = 1;
return 0;
}
@@ -780,7 +777,8 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt
* Walk @type and ensure that regions don't cross the boundaries defined by
* [@base, @base + @size). Crossing regions are split at the boundaries,
* which may create at most two more regions. The index of the first
- * region inside the range is returned in *@start_rgn and end in *@end_rgn.
+ * region inside the range is returned in *@start_rgn and the index of the
+ * first region after the range is returned in *@end_rgn.
*
* Return:
* 0 on success, -errno on failure.
@@ -1441,6 +1439,17 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
+ /*
+ * Detect any accidental use of these APIs after slab is ready, as at
+ * this moment memblock may be deinitialized already and its
+ * internal data may be destroyed (after execution of memblock_free_all)
+ */
+ if (WARN_ON_ONCE(slab_is_available())) {
+ void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid);
+
+ return vaddr ? virt_to_phys(vaddr) : 0;
+ }
+
if (!align) {
/* Can't use WARNs this early in boot on powerpc */
dump_stack();
@@ -1566,13 +1575,6 @@ static void * __init memblock_alloc_internal(
{
phys_addr_t alloc;
- /*
- * Detect any accidental use of these APIs after slab is ready, as at
- * this moment memblock may be deinitialized already and its
- * internal data may be destroyed (after execution of memblock_free_all)
- */
- if (WARN_ON_ONCE(slab_is_available()))
- return kzalloc_node(size, GFP_NOWAIT, nid);
if (max_addr > memblock.current_limit)
max_addr = memblock.current_limit;
@@ -2031,7 +2033,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
* downwards.
*/
pg = PAGE_ALIGN(__pa(start_pg));
- pgend = __pa(end_pg) & PAGE_MASK;
+ pgend = PAGE_ALIGN_DOWN(__pa(end_pg));
/*
* If there are free pages between these, free the section of the
@@ -2234,6 +2236,123 @@ void __init memblock_free_all(void)
totalram_pages_add(pages);
}
+/* Keep a table to reserve named memory */
+#define RESERVE_MEM_MAX_ENTRIES 8
+#define RESERVE_MEM_NAME_SIZE 16
+struct reserve_mem_table {
+ char name[RESERVE_MEM_NAME_SIZE];
+ phys_addr_t start;
+ phys_addr_t size;
+};
+static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
+static int reserved_mem_count;
+
+/* Add wildcard region with a lookup name */
+static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
+ const char *name)
+{
+ struct reserve_mem_table *map;
+
+ map = &reserved_mem_table[reserved_mem_count++];
+ map->start = start;
+ map->size = size;
+ strscpy(map->name, name);
+}
+
+/**
+ * reserve_mem_find_by_name - Find reserved memory region with a given name
+ * @name: The name that is attached to a reserved memory region
+ * @start: If found, holds the start address
+ * @size: If found, holds the size of the address.
+ *
+ * @start and @size are only updated if @name is found.
+ *
+ * Returns: 1 if found or 0 if not found.
+ */
+int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size)
+{
+ struct reserve_mem_table *map;
+ int i;
+
+ for (i = 0; i < reserved_mem_count; i++) {
+ map = &reserved_mem_table[i];
+ if (!map->size)
+ continue;
+ if (strcmp(name, map->name) == 0) {
+ *start = map->start;
+ *size = map->size;
+ return 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(reserve_mem_find_by_name);
+
+/*
+ * Parse reserve_mem=nn:align:name
+ */
+static int __init reserve_mem(char *p)
+{
+ phys_addr_t start, size, align, tmp;
+ char *name;
+ char *oldp;
+ int len;
+
+ if (!p)
+ return -EINVAL;
+
+ /* Check if there's room for more reserved memory */
+ if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES)
+ return -EBUSY;
+
+ oldp = p;
+ size = memparse(p, &p);
+ if (!size || p == oldp)
+ return -EINVAL;
+
+ if (*p != ':')
+ return -EINVAL;
+
+ align = memparse(p+1, &p);
+ if (*p != ':')
+ return -EINVAL;
+
+ /*
+ * memblock_phys_alloc() doesn't like a zero size align,
+ * but it is OK for this command to have it.
+ */
+ if (align < SMP_CACHE_BYTES)
+ align = SMP_CACHE_BYTES;
+
+ name = p + 1;
+ len = strlen(name);
+
+ /* name needs to have length but not too big */
+ if (!len || len >= RESERVE_MEM_NAME_SIZE)
+ return -EINVAL;
+
+ /* Make sure that name has text */
+ for (p = name; *p; p++) {
+ if (!isspace(*p))
+ break;
+ }
+ if (!*p)
+ return -EINVAL;
+
+ /* Make sure the name is not already used */
+ if (reserve_mem_find_by_name(name, &start, &tmp))
+ return -EBUSY;
+
+ start = memblock_phys_alloc(size, align);
+ if (!start)
+ return -ENOMEM;
+
+ reserved_mem_add(start, size, name);
+
+ return 1;
+}
+__setup("reserve_mem=", reserve_mem);
+
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
static const char * const flagname[] = {
[ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3ec04933f7fd..804df0309257 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -363,7 +363,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
nid = memblock_get_region_node(r);
- usable_startpfn = PFN_DOWN(r->base);
+ usable_startpfn = memblock_region_memory_base_pfn(r);
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
min(usable_startpfn, zone_movable_pfn[nid]) :
usable_startpfn;
@@ -676,6 +676,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
if (early_page_ext_enabled())
return false;
+
+ /* Always populate low zones for address-constrained allocations */
+ if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
+ return false;
+
+ if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
+ return true;
+
/*
* prev_end_pfn static that contains the end of previous zone
* No need to protect because called very early in boot before smp_init.
@@ -685,12 +693,6 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
nr_initialised = 0;
}
- /* Always populate low zones for address-constrained allocations */
- if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
- return false;
-
- if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
- return true;
/*
* We start only with one section of pages, more pages are added as
* needed until the rest of deferred pages are initialized.
@@ -758,9 +760,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
init_reserved_page(start_pfn, nid);
- /* Avoid false-positive PageTail() */
- INIT_LIST_HEAD(&page->lru);
-
/*
* no need for atomic set_bit because the struct
* page is not visible yet so nobody should
@@ -2019,24 +2018,29 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
}
/*
- * This function is meant to pre-load the iterator for the zone init.
- * Specifically it walks through the ranges until we are caught up to the
- * first_init_pfn value and exits there. If we never encounter the value we
- * return false indicating there are no valid ranges left.
+ * This function is meant to pre-load the iterator for the zone init from
+ * a given point.
+ * Specifically it walks through the ranges starting with initial index
+ * passed to it until we are caught up to the first_init_pfn value and
+ * exits there. If we never encounter the value we return false indicating
+ * there are no valid ranges left.
*/
static bool __init
deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
unsigned long *spfn, unsigned long *epfn,
unsigned long first_init_pfn)
{
- u64 j;
+ u64 j = *i;
+
+ if (j == 0)
+ __next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
/*
* Start out by walking through the ranges in this zone that have
* already been initialized. We don't need to do anything with them
* so we just need to flush them out of the system.
*/
- for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) {
+ for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
if (*epfn <= first_init_pfn)
continue;
if (*spfn < first_init_pfn)
@@ -2108,7 +2112,7 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
{
unsigned long spfn, epfn;
struct zone *zone = arg;
- u64 i;
+ u64 i = 0;
deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
@@ -2138,8 +2142,8 @@ static int __init deferred_init_memmap(void *data)
unsigned long first_init_pfn, flags;
unsigned long start = jiffies;
struct zone *zone;
- int zid, max_threads;
- u64 i;
+ int max_threads;
+ u64 i = 0;
/* Bind memory initialisation thread to a local node if possible */
if (!cpumask_empty(cpumask))
@@ -2165,27 +2169,18 @@ static int __init deferred_init_memmap(void *data)
*/
pgdat_resize_unlock(pgdat, &flags);
- /* Only the highest zone is deferred so find it */
- for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- zone = pgdat->node_zones + zid;
- if (first_init_pfn < zone_end_pfn(zone))
- break;
- }
-
- /* If the zone is empty somebody else may have cleared out the zone */
- if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
- first_init_pfn))
- goto zone_empty;
+ /* Only the highest zone is deferred */
+ zone = pgdat->node_zones + pgdat->nr_zones - 1;
max_threads = deferred_page_init_max_threads(cpumask);
- while (spfn < epfn) {
- unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION);
+ while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
+ first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
struct padata_mt_job job = {
.thread_fn = deferred_init_memmap_chunk,
.fn_arg = zone,
.start = spfn,
- .size = epfn_align - spfn,
+ .size = first_init_pfn - spfn,
.align = PAGES_PER_SECTION,
.min_chunk = PAGES_PER_SECTION,
.max_threads = max_threads,
@@ -2193,12 +2188,10 @@ static int __init deferred_init_memmap(void *data)
};
padata_do_multithreaded(&job);
- deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
- epfn_align);
}
-zone_empty:
+
/* Sanity check that the next zone really is unpopulated */
- WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
+ WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone));
pr_info("node %d deferred pages initialised in %ums\n",
pgdat->node_id, jiffies_to_msecs(jiffies - start));
@@ -2225,7 +2218,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
unsigned long spfn, epfn, flags;
unsigned long nr_pages = 0;
- u64 i;
+ u64 i = 0;
/* Only the last zone may have deferred pages */
if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h
index dc0fc7125bc3..cad4f2927983 100644
--- a/tools/include/linux/mm.h
+++ b/tools/include/linux/mm.h
@@ -12,6 +12,7 @@
#define PHYS_ADDR_MAX (~(phys_addr_t)0)
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
+#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE)
#define __va(x) ((void *)((unsigned long)(x)))
#define __pa(x) ((unsigned long)(x))
diff --git a/tools/include/linux/numa.h b/tools/include/linux/numa.h
index 110b0e5d0fb0..c8b9369335e0 100644
--- a/tools/include/linux/numa.h
+++ b/tools/include/linux/numa.h
@@ -13,4 +13,9 @@
#define NUMA_NO_NODE (-1)
+static inline bool numa_valid_node(int nid)
+{
+ return nid >= 0 && nid < MAX_NUMNODES;
+}
+
#endif /* _LINUX_NUMA_H */
diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 57bf2688edfd..67503089e6a0 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -15,12 +15,12 @@ static int memblock_initialization_check(void)
PREFIX_PUSH();
ASSERT_NE(memblock.memory.regions, NULL);
- ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.cnt, 0);
ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS);
ASSERT_EQ(strcmp(memblock.memory.name, "memory"), 0);
ASSERT_NE(memblock.reserved.regions, NULL);
- ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.cnt, 0);
ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS);
ASSERT_EQ(strcmp(memblock.reserved.name, "reserved"), 0);
@@ -982,6 +982,262 @@ static int memblock_reserve_many_check(void)
return 0;
}
+
+/*
+ * A test that trying to reserve the 129th memory block at all locations.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as reserved.regions.
+ *
+ * 0 1 2 128
+ * +-------+ +-------+ +-------+ +-------+
+ * | 32K | | 32K | | 32K | ... | 32K |
+ * +-------+-------+-------+-------+-------+ +-------+
+ * |<-32K->| |<-32K->|
+ *
+ */
+/* Keep the gap so these memory region will not be merged. */
+#define MEMORY_BASE(idx) (SZ_128K + (MEM_SIZE * 2) * (idx))
+static int memblock_reserve_all_locations_check(void)
+{
+ int i, skip;
+ void *orig_region;
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_16K,
+ };
+ phys_addr_t new_reserved_regions_size;
+
+ PREFIX_PUSH();
+
+ /* Reserve the 129th memory block for all possible positions*/
+ for (skip = 0; skip < INIT_MEMBLOCK_REGIONS + 1; skip++) {
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ /* Add a valid memory region used by double_array(). */
+ dummy_physical_memory_init();
+ memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+
+ for (i = 0; i < INIT_MEMBLOCK_REGIONS + 1; i++) {
+ if (i == skip)
+ continue;
+
+ /* Reserve some fakes memory region to fulfill the memblock. */
+ memblock_reserve(MEMORY_BASE(i), MEM_SIZE);
+
+ if (i < skip) {
+ ASSERT_EQ(memblock.reserved.cnt, i + 1);
+ ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE);
+ } else {
+ ASSERT_EQ(memblock.reserved.cnt, i);
+ ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE);
+ }
+ }
+
+ orig_region = memblock.reserved.regions;
+
+ /* This reserve the 129 memory_region, and makes it double array. */
+ memblock_reserve(MEMORY_BASE(skip), MEM_SIZE);
+
+ /*
+ * This is the memory region size used by the doubled reserved.regions,
+ * and it has been reserved due to it has been used. The size is used to
+ * calculate the total_size that the memblock.reserved have now.
+ */
+ new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
+ sizeof(struct memblock_region));
+ /*
+ * The double_array() will find a free memory region as the new
+ * reserved.regions, and the used memory region will be reserved, so
+ * there will be one more region exist in the reserved memblock. And the
+ * one more reserved region's size is new_reserved_regions_size.
+ */
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * Now memblock_double_array() works fine. Let's check after the
+ * double_array(), the memblock_reserve() still works as normal.
+ */
+ memblock_reserve(r.base, r.size);
+ ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
+ ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size +
+ r.size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+ }
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that trying to reserve the 129th memory block at all locations.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as reserved.regions. And make
+ * sure it doesn't conflict with the range we want to reserve.
+ *
+ * For example, we have 128 regions in reserved and now want to reserve
+ * the skipped one. Since reserved is full, memblock_double_array() would find
+ * an available range in memory for the new array. We intended to put two
+ * ranges in memory with one is the exact range of the skipped one. Before
+ * commit 48c3b583bbdd ("mm/memblock: fix overlapping allocation when doubling
+ * reserved array"), the new array would sits in the skipped range which is a
+ * conflict. The expected new array should be allocated from memory.regions[0].
+ *
+ * 0 1
+ * memory +-------+ +-------+
+ * | 32K | | 32K |
+ * +-------+ ------+-------+-------+-------+
+ * |<-32K->|<-32K->|<-32K->|
+ *
+ * 0 skipped 127
+ * reserved +-------+ ......... +-------+
+ * | 32K | . 32K . ... | 32K |
+ * +-------+-------+-------+ +-------+
+ * |<-32K->|
+ * ^
+ * |
+ * |
+ * skipped one
+ */
+/* Keep the gap so these memory region will not be merged. */
+#define MEMORY_BASE_OFFSET(idx, offset) ((offset) + (MEM_SIZE * 2) * (idx))
+static int memblock_reserve_many_may_conflict_check(void)
+{
+ int i, skip;
+ void *orig_region;
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_16K,
+ };
+ phys_addr_t new_reserved_regions_size;
+
+ /*
+ * 0 1 129
+ * +---+ +---+ +---+
+ * |32K| |32K| .. |32K|
+ * +---+ +---+ +---+
+ *
+ * Pre-allocate the range for 129 memory block + one range for double
+ * memblock.reserved.regions at idx 0.
+ */
+ dummy_physical_memory_init();
+ phys_addr_t memory_base = dummy_physical_memory_base();
+ phys_addr_t offset = PAGE_ALIGN(memory_base);
+
+ PREFIX_PUSH();
+
+ /* Reserve the 129th memory block for all possible positions*/
+ for (skip = 1; skip <= INIT_MEMBLOCK_REGIONS + 1; skip++) {
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ reset_memblock_attributes();
+ /* Add a valid memory region used by double_array(). */
+ memblock_add(MEMORY_BASE_OFFSET(0, offset), MEM_SIZE);
+ /*
+ * Add a memory region which will be reserved as 129th memory
+ * region. This is not expected to be used by double_array().
+ */
+ memblock_add(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE);
+
+ for (i = 1; i <= INIT_MEMBLOCK_REGIONS + 1; i++) {
+ if (i == skip)
+ continue;
+
+ /* Reserve some fakes memory region to fulfill the memblock. */
+ memblock_reserve(MEMORY_BASE_OFFSET(i, offset), MEM_SIZE);
+
+ if (i < skip) {
+ ASSERT_EQ(memblock.reserved.cnt, i);
+ ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE);
+ } else {
+ ASSERT_EQ(memblock.reserved.cnt, i - 1);
+ ASSERT_EQ(memblock.reserved.total_size, (i - 1) * MEM_SIZE);
+ }
+ }
+
+ orig_region = memblock.reserved.regions;
+
+ /* This reserve the 129 memory_region, and makes it double array. */
+ memblock_reserve(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE);
+
+ /*
+ * This is the memory region size used by the doubled reserved.regions,
+ * and it has been reserved due to it has been used. The size is used to
+ * calculate the total_size that the memblock.reserved have now.
+ */
+ new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
+ sizeof(struct memblock_region));
+ /*
+ * The double_array() will find a free memory region as the new
+ * reserved.regions, and the used memory region will be reserved, so
+ * there will be one more region exist in the reserved memblock. And the
+ * one more reserved region's size is new_reserved_regions_size.
+ */
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * The first reserved region is allocated for double array
+ * with the size of new_reserved_regions_size and the base to be
+ * MEMORY_BASE_OFFSET(0, offset) + SZ_32K - new_reserved_regions_size
+ */
+ ASSERT_EQ(memblock.reserved.regions[0].base + memblock.reserved.regions[0].size,
+ MEMORY_BASE_OFFSET(0, offset) + SZ_32K);
+ ASSERT_EQ(memblock.reserved.regions[0].size, new_reserved_regions_size);
+
+ /*
+ * Now memblock_double_array() works fine. Let's check after the
+ * double_array(), the memblock_reserve() still works as normal.
+ */
+ memblock_reserve(r.base, r.size);
+ ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
+ ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size +
+ r.size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+ }
+
+ dummy_physical_memory_cleanup();
+
+ test_pass_pop();
+
+ return 0;
+}
+
static int memblock_reserve_checks(void)
{
prefix_reset();
@@ -997,6 +1253,8 @@ static int memblock_reserve_checks(void)
memblock_reserve_between_check();
memblock_reserve_near_max_check();
memblock_reserve_many_check();
+ memblock_reserve_all_locations_check();
+ memblock_reserve_many_may_conflict_check();
prefix_pop();
@@ -1295,7 +1553,7 @@ static int memblock_remove_only_region_check(void)
ASSERT_EQ(rgn->base, 0);
ASSERT_EQ(rgn->size, 0);
- ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.cnt, 0);
ASSERT_EQ(memblock.memory.total_size, 0);
test_pass_pop();
@@ -1723,7 +1981,7 @@ static int memblock_free_only_region_check(void)
ASSERT_EQ(rgn->base, 0);
ASSERT_EQ(rgn->size, 0);
- ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.cnt, 0);
ASSERT_EQ(memblock.reserved.total_size, 0);
test_pass_pop();
@@ -2129,6 +2387,53 @@ static int memblock_trim_memory_checks(void)
return 0;
}
+static int memblock_overlaps_region_check(void)
+{
+ struct region r = {
+ .base = SZ_1G,
+ .size = SZ_4M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r.base, r.size);
+
+ /* Far Away */
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1M, SZ_1M));
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_2G, SZ_1M));
+
+ /* Neighbor */
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_1M));
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_4M, SZ_1M));
+
+ /* Partial Overlap */
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_2M));
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_2M, SZ_2M));
+
+ /* Totally Overlap */
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G, SZ_4M));
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_2M, SZ_8M));
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_1M, SZ_1M));
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_overlaps_region_checks(void)
+{
+ prefix_reset();
+ prefix_push("memblock_overlaps_region");
+ test_print("Running memblock_overlaps_region tests...\n");
+
+ memblock_overlaps_region_check();
+
+ prefix_pop();
+
+ return 0;
+}
+
int memblock_basic_checks(void)
{
memblock_initialization_check();
@@ -2138,6 +2443,7 @@ int memblock_basic_checks(void)
memblock_free_checks();
memblock_bottom_up_checks();
memblock_trim_memory_checks();
+ memblock_overlaps_region_checks();
return 0;
}
diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
index f43b6f414983..3250c8e5124b 100644
--- a/tools/testing/memblock/tests/common.c
+++ b/tools/testing/memblock/tests/common.c
@@ -40,13 +40,13 @@ void reset_memblock_regions(void)
{
memset(memblock.memory.regions, 0,
memblock.memory.cnt * sizeof(struct memblock_region));
- memblock.memory.cnt = 1;
+ memblock.memory.cnt = 0;
memblock.memory.max = INIT_MEMBLOCK_REGIONS;
memblock.memory.total_size = 0;
memset(memblock.reserved.regions, 0,
memblock.reserved.cnt * sizeof(struct memblock_region));
- memblock.reserved.cnt = 1;
+ memblock.reserved.cnt = 0;
memblock.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS;
memblock.reserved.total_size = 0;
}
@@ -61,7 +61,7 @@ void reset_memblock_attributes(void)
static inline void fill_memblock(void)
{
- memset(memory_block.base, 1, MEM_SIZE);
+ memset(memory_block.base, 1, PHYS_MEM_SIZE);
}
void setup_memblock(void)
@@ -103,7 +103,7 @@ void setup_numa_memblock(const unsigned int node_fracs[])
void dummy_physical_memory_init(void)
{
- memory_block.base = malloc(MEM_SIZE);
+ memory_block.base = malloc(PHYS_MEM_SIZE);
assert(memory_block.base);
fill_memblock();
}
diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h
index b5ec59aa62d7..e1138e06c903 100644
--- a/tools/testing/memblock/tests/common.h
+++ b/tools/testing/memblock/tests/common.h
@@ -12,6 +12,7 @@
#include <../selftests/kselftest.h>
#define MEM_SIZE SZ_32K
+#define PHYS_MEM_SIZE SZ_16M
#define NUMA_NODES 8
#define INIT_MEMBLOCK_REGIONS 128
@@ -39,6 +40,9 @@ enum test_flags {
assert((_expected) == (_seen)); \
} while (0)
+#define ASSERT_TRUE(_seen) ASSERT_EQ(true, _seen)
+#define ASSERT_FALSE(_seen) ASSERT_EQ(false, _seen)
+
/**
* ASSERT_NE():
* Check the condition