summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mm/page_alloc.c40
1 files changed, 18 insertions, 22 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b1e3483249c3..0c47af9e97c6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1344,7 +1344,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
{
int migratetype = 0;
int batch_free = 0;
- int prefetch_nr = 0;
+ int prefetch_nr = READ_ONCE(pcp->batch);
bool isolated_pageblocks;
struct page *page, *tmp;
LIST_HEAD(head);
@@ -1395,8 +1395,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
* avoid excessive prefetching due to large count, only
* prefetch buddy for the first pcp->batch nr of pages.
*/
- if (prefetch_nr++ < pcp->batch)
+ if (prefetch_nr) {
prefetch_buddy(page);
+ prefetch_nr--;
+ }
} while (--count && --batch_free && !list_empty(list));
}
@@ -3197,10 +3199,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list_add(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
- if (pcp->count >= pcp->high) {
- unsigned long batch = READ_ONCE(pcp->batch);
- free_pcppages_bulk(zone, batch, pcp);
- }
+ if (pcp->count >= READ_ONCE(pcp->high))
+ free_pcppages_bulk(zone, READ_ONCE(pcp->batch), pcp);
}
/*
@@ -3385,7 +3385,7 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
do {
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
- pcp->batch, list,
+ READ_ONCE(pcp->batch), list,
migratetype, alloc_flags);
if (unlikely(list_empty(list)))
return NULL;
@@ -6270,13 +6270,16 @@ static int zone_batchsize(struct zone *zone)
}
/*
- * pcp->high and pcp->batch values are related and dependent on one another:
- * ->batch must never be higher then ->high.
- * The following function updates them in a safe manner without read side
- * locking.
+ * pcp->high and pcp->batch values are related and generally batch is lower
+ * than high. They are also related to pcp->count such that count is lower
+ * than high, and as soon as it reaches high, the pcplist is flushed.
*
- * Any new users of pcp->batch and pcp->high should ensure they can cope with
- * those fields changing asynchronously (acording to the above rule).
+ * However, guaranteeing these relations at all times would require e.g. write
+ * barriers here but also careful usage of read barriers at the read side, and
+ * thus be prone to error and bad for performance. Thus the update only prevents
+ * store tearing. Any new users of pcp->batch and pcp->high should ensure they
+ * can cope with those fields changing asynchronously, and fully trust only the
+ * pcp->count field on the local CPU with interrupts disabled.
*
* mutex_is_locked(&pcp_batch_high_lock) required when calling this function
* outside of boot time (or some other assurance that no concurrent updaters
@@ -6285,15 +6288,8 @@ static int zone_batchsize(struct zone *zone)
static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
unsigned long batch)
{
- /* start with a fail safe value for batch */
- pcp->batch = 1;
- smp_wmb();
-
- /* Update high, then batch, in order */
- pcp->high = high;
- smp_wmb();
-
- pcp->batch = batch;
+ WRITE_ONCE(pcp->batch, batch);
+ WRITE_ONCE(pcp->high, high);
}
static void pageset_init(struct per_cpu_pageset *p)