From 4e717f5c1083995c334ced639cc77a75e9972567 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 28 Aug 2013 10:18:03 +1000 Subject: list_lru: remove special case function list_lru_dispose_all. The list_lru implementation has one function, list_lru_dispose_all, with only one user (the dentry code). At first, such function appears to make sense because we are really not interested in the result of isolating each dentry separately - all of them are going away anyway. However, it's implementation is buggy in the following way: When we call list_lru_dispose_all in fs/dcache.c, we scan all dentries marking them with DCACHE_SHRINK_LIST. However, this is done without the nlru->lock taken. The imediate result of that is that someone else may add or remove the dentry from the LRU at the same time. When list_lru_del happens in that scenario we will see an element that is not yet marked with DCACHE_SHRINK_LIST (even though it will be in the future) and obviously remove it from an lru where the element no longer is. Since list_lru_dispose_all will in effect count down nlru's nr_items and list_lru_del will do the same, this will lead to an imbalance. The solution for this would not be so simple: we can obviously just keep the lru_lock taken, but then we have no guarantees that we will be able to acquire the dentry lock (dentry->d_lock). To properly solve this, we need a communication mechanism between the lru and dentry code, so they can coordinate this with each other. Such mechanism already exists in the form of the list_lru_walk_cb callback. So it is possible to construct a dcache-side prune function that does the right thing only by calling list_lru_walk in a loop until no more dentries are available. With only one user, plus the fact that a sane solution for the problem would involve boucing between dcache and list_lru anyway, I see little justification to keep the special case list_lru_dispose_all in tree. Signed-off-by: Glauber Costa Cc: Michal Hocko Acked-by: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/dcache.c | 49 +++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 38a4a03499a2..d74b5bdff7f9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -956,27 +956,29 @@ long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan) return freed; } -/* - * Mark all the dentries as on being the dispose list so we don't think they are - * still on the LRU if we try to kill them from ascending the parent chain in - * try_prune_one_dentry() rather than directly from the dispose list. - */ -static void -shrink_dcache_list( - struct list_head *dispose) +static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, + spinlock_t *lru_lock, void *arg) { - struct dentry *dentry; + struct list_head *freeable = arg; + struct dentry *dentry = container_of(item, struct dentry, d_lru); - rcu_read_lock(); - list_for_each_entry_rcu(dentry, dispose, d_lru) { - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_SHRINK_LIST; - spin_unlock(&dentry->d_lock); - } - rcu_read_unlock(); - shrink_dentry_list(dispose); + /* + * we are inverting the lru lock/dentry->d_lock here, + * so use a trylock. If we fail to get the lock, just skip + * it + */ + if (!spin_trylock(&dentry->d_lock)) + return LRU_SKIP; + + dentry->d_flags |= DCACHE_SHRINK_LIST; + list_move_tail(&dentry->d_lru, freeable); + this_cpu_dec(nr_dentry_unused); + spin_unlock(&dentry->d_lock); + + return LRU_REMOVED; } + /** * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock @@ -986,10 +988,17 @@ shrink_dcache_list( */ void shrink_dcache_sb(struct super_block *sb) { - long disposed; + long freed; + + do { + LIST_HEAD(dispose); + + freed = list_lru_walk(&sb->s_dentry_lru, + dentry_lru_isolate_shrink, &dispose, UINT_MAX); - disposed = list_lru_dispose_all(&sb->s_dentry_lru, shrink_dcache_list); - this_cpu_sub(nr_dentry_unused, disposed); + this_cpu_sub(nr_dentry_unused, freed); + shrink_dentry_list(&dispose); + } while (freed > 0); } EXPORT_SYMBOL(shrink_dcache_sb); -- cgit v1.2.3