diff options
-rw-r--r-- | include/linux/mmzone.h | 8 | ||||
-rw-r--r-- | mm/vmscan.c | 112 |
2 files changed, 42 insertions, 78 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 337956748976..a4889c9d4055 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -443,18 +443,14 @@ enum { struct lru_gen_mm_state { /* set to max_seq after each iteration */ unsigned long seq; - /* where the current iteration continues (inclusive) */ + /* where the current iteration continues after */ struct list_head *head; - /* where the last iteration ended (exclusive) */ + /* where the last iteration ended before */ struct list_head *tail; - /* to wait for the last page table walker to finish */ - struct wait_queue_head wait; /* Bloom filters flip after each iteration */ unsigned long *filters[NR_BLOOM_FILTERS]; /* the mm stats for debugging */ unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; - /* the number of concurrent page table walkers */ - int nr_walkers; }; struct lru_gen_mm_walk { diff --git a/mm/vmscan.c b/mm/vmscan.c index 2eb5862e99ec..8c05f4c288ce 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3450,18 +3450,13 @@ void lru_gen_del_mm(struct mm_struct *mm) for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); - /* where the last iteration ended (exclusive) */ + /* where the current iteration continues after */ + if (lruvec->mm_state.head == &mm->lru_gen.list) + lruvec->mm_state.head = lruvec->mm_state.head->prev; + + /* where the last iteration ended before */ if (lruvec->mm_state.tail == &mm->lru_gen.list) lruvec->mm_state.tail = lruvec->mm_state.tail->next; - - /* where the current iteration continues (inclusive) */ - if (lruvec->mm_state.head != &mm->lru_gen.list) - continue; - - lruvec->mm_state.head = lruvec->mm_state.head->next; - /* the deletion ends the current iteration */ - if (lruvec->mm_state.head == &mm_list->fifo) - WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1); } list_del_init(&mm->lru_gen.list); @@ -3557,68 +3552,54 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, struct mm_struct **iter) { bool first = false; - bool last = true; + bool last = false; struct mm_struct *mm = NULL; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct lru_gen_mm_list *mm_list = get_mm_list(memcg); struct lru_gen_mm_state *mm_state = &lruvec->mm_state; /* - * There are four interesting cases for this page table walker: - * 1. It tries to start a new iteration of mm_list with a stale max_seq; - * there is nothing left to do. - * 2. It's the first of the current generation, and it needs to reset - * the Bloom filter for the next generation. - * 3. It reaches the end of mm_list, and it needs to increment - * mm_state->seq; the iteration is done. - * 4. It's the last of the current generation, and it needs to reset the - * mm stats counters for the next generation. + * mm_state->seq is incremented after each iteration of mm_list. There + * are three interesting cases for this page table walker: + * 1. It tries to start a new iteration with a stale max_seq: there is + * nothing left to do. + * 2. It started the next iteration: it needs to reset the Bloom filter + * so that a fresh set of PTE tables can be recorded. + * 3. It ended the current iteration: it needs to reset the mm stats + * counters and tell its caller to increment max_seq. */ spin_lock(&mm_list->lock); VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq); - VM_WARN_ON_ONCE(*iter && mm_state->seq > walk->max_seq); - VM_WARN_ON_ONCE(*iter && !mm_state->nr_walkers); - if (walk->max_seq <= mm_state->seq) { - if (!*iter) - last = false; + if (walk->max_seq <= mm_state->seq) goto done; - } - if (!mm_state->nr_walkers) { - VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); + if (!mm_state->head) + mm_state->head = &mm_list->fifo; - mm_state->head = mm_list->fifo.next; + if (mm_state->head == &mm_list->fifo) first = true; - } - - while (!mm && mm_state->head != &mm_list->fifo) { - mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); + do { mm_state->head = mm_state->head->next; + if (mm_state->head == &mm_list->fifo) { + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); + last = true; + break; + } /* force scan for those added after the last iteration */ - if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) { - mm_state->tail = mm_state->head; + if (!mm_state->tail || mm_state->tail == mm_state->head) { + mm_state->tail = mm_state->head->next; walk->force_scan = true; } + mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); if (should_skip_mm(mm, walk)) mm = NULL; - } - - if (mm_state->head == &mm_list->fifo) - WRITE_ONCE(mm_state->seq, mm_state->seq + 1); + } while (!mm); done: - if (*iter && !mm) - mm_state->nr_walkers--; - if (!*iter && mm) - mm_state->nr_walkers++; - - if (mm_state->nr_walkers) - last = false; - if (*iter || last) reset_mm_stats(lruvec, walk, last); @@ -3646,9 +3627,9 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq); - if (max_seq > mm_state->seq && !mm_state->nr_walkers) { - VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); - + if (max_seq > mm_state->seq) { + mm_state->head = NULL; + mm_state->tail = NULL; WRITE_ONCE(mm_state->seq, mm_state->seq + 1); reset_mm_stats(lruvec, NULL, true); success = true; @@ -4248,10 +4229,6 @@ restart: walk_pmd_range(&val, addr, next, args); - /* a racy check to curtail the waiting time */ - if (wq_has_sleeper(&walk->lruvec->mm_state.wait)) - return 1; - if (need_resched() || walk->batched >= MAX_LRU_BATCH) { end = (addr | ~PUD_MASK) + 1; goto done; @@ -4284,8 +4261,14 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ walk->next_addr = FIRST_USER_ADDRESS; do { + DEFINE_MAX_SEQ(lruvec); + err = -EBUSY; + /* another thread might have called inc_max_seq() */ + if (walk->max_seq != max_seq) + break; + /* folio_update_gen() requires stable folio_memcg() */ if (!mem_cgroup_trylock_pages(memcg)) break; @@ -4518,25 +4501,12 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, success = iterate_mm_list(lruvec, walk, &mm); if (mm) walk_mm(lruvec, mm, walk); - - cond_resched(); } while (mm); done: - if (!success) { - if (sc->priority <= DEF_PRIORITY - 2) - wait_event_killable(lruvec->mm_state.wait, - max_seq < READ_ONCE(lrugen->max_seq)); - return false; - } + if (success) + inc_max_seq(lruvec, can_swap, force_scan); - VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); - - inc_max_seq(lruvec, can_swap, force_scan); - /* either this sees any waiters or they will see updated max_seq */ - if (wq_has_sleeper(&lruvec->mm_state.wait)) - wake_up_all(&lruvec->mm_state.wait); - - return true; + return success; } /****************************************************************************** @@ -6173,7 +6143,6 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]); lruvec->mm_state.seq = MIN_NR_GENS; - init_waitqueue_head(&lruvec->mm_state.wait); } #ifdef CONFIG_MEMCG @@ -6206,7 +6175,6 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); - VM_WARN_ON_ONCE(lruvec->mm_state.nr_walkers); VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0, sizeof(lruvec->lrugen.nr_pages))); |