dm cache: significant rework to leverage dm-bio-prison-v2

The cache policy interfaces have been updated to work well with the new bio-prison v2 interface's ability to queue work immediately (promotion, demotion, etc) -- overriding benefit being reduced latency on processing IO through the cache. Previously such work would be left for the DM cache core to queue on various lists and then process in batches later -- this caused a serious delay in latency for IO driven by the cache. The background tracker code was factored out so that all cache policies can make use of it. Also, the "cleaner" policy has been removed and is now a variant of the smq policy that simply disallows migrations. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
author: Joe Thornber <ejt@redhat.com> 2016-12-15 04:57:31 -0500
committer: Mike Snitzer <snitzer@redhat.com> 2017-03-07 13:28:31 -0500
commit: b29d4986d0da1a27cd35917cdb433672f5c95d7f (patch)
tree: a5d94b86cf1eb759bfef5761015135d747e80561 /drivers/md/dm-cache-policy-smq.c
parent: 742c8fdc31e820503f9267070311d894978d1349 (diff)
1 files changed, 461 insertions, 360 deletions
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index f19c6930a67c..74436dc2122f 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -4,8 +4,9 @@
  * This file is released under the GPL.
  */
 
-#include "dm-cache-policy.h"
+#include "dm-cache-background-tracker.h"
 #include "dm-cache-policy-internal.h"
+#include "dm-cache-policy.h"
 #include "dm.h"
 
 #include <linux/hash.h>
@@ -38,10 +39,11 @@ struct entry {
 	unsigned hash_next:28;
 	unsigned prev:28;
 	unsigned next:28;
-	unsigned level:7;
+	unsigned level:6;
 	bool dirty:1;
 	bool allocated:1;
 	bool sentinel:1;
+	bool pending_work:1;
 
 	dm_oblock_t oblock;
 };
@@ -279,14 +281,28 @@ static unsigned q_size(struct queue *q)
  */
 static void q_push(struct queue *q, struct entry *e)
 {
+	BUG_ON(e->pending_work);
+
 	if (!e->sentinel)
 		q->nr_elts++;
 
 	l_add_tail(q->es, q->qs + e->level, e);
 }
 
+static void q_push_front(struct queue *q, struct entry *e)
+{
+	BUG_ON(e->pending_work);
+
+	if (!e->sentinel)
+		q->nr_elts++;
+
+	l_add_head(q->es, q->qs + e->level, e);
+}
+
 static void q_push_before(struct queue *q, struct entry *old, struct entry *e)
 {
+	BUG_ON(e->pending_work);
+
 	if (!e->sentinel)
 		q->nr_elts++;
 
@@ -336,19 +352,6 @@ static struct entry *q_pop(struct queue *q)
 }
 
 /*
- * Pops an entry from a level that is not past a sentinel.
- */
-static struct entry *q_pop_old(struct queue *q, unsigned max_level)
-{
-	struct entry *e = q_peek(q, max_level, false);
-
-	if (e)
-		q_del(q, e);
-
-	return e;
-}
-
-/*
  * This function assumes there is a non-sentinel entry to pop.  It's only
  * used by redistribute, so we know this is true.  It also doesn't adjust
  * the q->nr_elts count.
@@ -446,45 +449,49 @@ static void q_redistribute(struct queue *q)
 				break;
 
 			e->level = level + 1u;
-			l_add_head(q->es, l_above, e);
+			l_add_tail(q->es, l_above, e);
 		}
 	}
 }
 
-static void q_requeue_before(struct queue *q, struct entry *dest, struct entry *e, unsigned extra_levels)
+static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels,
+		      struct entry *s1, struct entry *s2)
 {
 	struct entry *de;
-	unsigned new_level;
-
-	q_del(q, e);
+	unsigned sentinels_passed = 0;
+	unsigned new_level = min(q->nr_levels - 1u, e->level + extra_levels);
 
+	/* try and find an entry to swap with */
 	if (extra_levels && (e->level < q->nr_levels - 1u)) {
-		new_level = min(q->nr_levels - 1u, e->level + extra_levels);
-		for (de = l_head(q->es, q->qs + new_level); de; de = l_next(q->es, de)) {
-			if (de->sentinel)
-				continue;
+		for (de = l_head(q->es, q->qs + new_level); de && de->sentinel; de = l_next(q->es, de))
+			sentinels_passed++;
 
+		if (de) {
 			q_del(q, de);
 			de->level = e->level;
+			if (s1) {
+				switch (sentinels_passed) {
+				case 0:
+					q_push_before(q, s1, de);
+					break;
+
+				case 1:
+					q_push_before(q, s2, de);
+					break;
 
-			if (dest)
-				q_push_before(q, dest, de);
-			else
+				default:
+					q_push(q, de);
+				}
+			} else
 				q_push(q, de);
-			break;
 		}
-
-		e->level = new_level;
 	}
 
+	q_del(q, e);
+	e->level = new_level;
 	q_push(q, e);
 }
 
-static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels)
-{
-	q_requeue_before(q, NULL, e, extra_levels);
-}
-
 /*----------------------------------------------------------------*/
 
 #define FP_SHIFT 8
@@ -550,7 +557,7 @@ static enum performance stats_assess(struct stats *s)
 
 /*----------------------------------------------------------------*/
 
-struct hash_table {
+struct smq_hash_table {
 	struct entry_space *es;
 	unsigned long long hash_bits;
 	unsigned *buckets;
@@ -560,7 +567,7 @@ struct hash_table {
  * All cache entries are stored in a chained hash table.  To save space we
  * use indexing again, and only store indexes to the next entry.
  */
-static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_entries)
+static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned nr_entries)
 {
 	unsigned i, nr_buckets;
 
@@ -578,34 +585,34 @@ static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_ent
 	return 0;
 }
 
-static void h_exit(struct hash_table *ht)
+static void h_exit(struct smq_hash_table *ht)
 {
 	vfree(ht->buckets);
 }
 
-static struct entry *h_head(struct hash_table *ht, unsigned bucket)
+static struct entry *h_head(struct smq_hash_table *ht, unsigned bucket)
 {
 	return to_entry(ht->es, ht->buckets[bucket]);
 }
 
-static struct entry *h_next(struct hash_table *ht, struct entry *e)
+static struct entry *h_next(struct smq_hash_table *ht, struct entry *e)
 {
 	return to_entry(ht->es, e->hash_next);
 }
 
-static void __h_insert(struct hash_table *ht, unsigned bucket, struct entry *e)
+static void __h_insert(struct smq_hash_table *ht, unsigned bucket, struct entry *e)
 {
 	e->hash_next = ht->buckets[bucket];
 	ht->buckets[bucket] = to_index(ht->es, e);
 }
 
-static void h_insert(struct hash_table *ht, struct entry *e)
+static void h_insert(struct smq_hash_table *ht, struct entry *e)
 {
 	unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
 	__h_insert(ht, h, e);
 }
 
-static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t oblock,
+static struct entry *__h_lookup(struct smq_hash_table *ht, unsigned h, dm_oblock_t oblock,
 				struct entry **prev)
 {
 	struct entry *e;
@@ -621,7 +628,7 @@ static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t o
 	return NULL;
 }
 
-static void __h_unlink(struct hash_table *ht, unsigned h,
+static void __h_unlink(struct smq_hash_table *ht, unsigned h,
 		       struct entry *e, struct entry *prev)
 {
 	if (prev)
@@ -633,7 +640,7 @@ static void __h_unlink(struct hash_table *ht, unsigned h,
 /*
  * Also moves each entry to the front of the bucket.
  */
-static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock)
+static struct entry *h_lookup(struct smq_hash_table *ht, dm_oblock_t oblock)
 {
 	struct entry *e, *prev;
 	unsigned h = hash_64(from_oblock(oblock), ht->hash_bits);
@@ -651,7 +658,7 @@ static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock)
 	return e;
 }
 
-static void h_remove(struct hash_table *ht, struct entry *e)
+static void h_remove(struct smq_hash_table *ht, struct entry *e)
 {
 	unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
 	struct entry *prev;
@@ -699,7 +706,10 @@ static void init_entry(struct entry *e)
 	e->next = INDEXER_NULL;
 	e->prev = INDEXER_NULL;
 	e->level = 0u;
+	e->dirty = true;	/* FIXME: audit */
 	e->allocated = true;
+	e->sentinel = false;
+	e->pending_work = false;
 }
 
 static struct entry *alloc_entry(struct entry_alloc *ea)
@@ -762,11 +772,11 @@ static struct entry *get_entry(struct entry_alloc *ea, unsigned index)
 #define NR_HOTSPOT_LEVELS 64u
 #define NR_CACHE_LEVELS 64u
 
-#define WRITEBACK_PERIOD (10 * HZ)
-#define DEMOTE_PERIOD (60 * HZ)
+#define WRITEBACK_PERIOD (10ul * HZ)
+#define DEMOTE_PERIOD (60ul * HZ)
 
 #define HOTSPOT_UPDATE_PERIOD (HZ)
-#define CACHE_UPDATE_PERIOD (10u * HZ)
+#define CACHE_UPDATE_PERIOD (60ul * HZ)
 
 struct smq_policy {
 	struct dm_cache_policy policy;
@@ -814,8 +824,8 @@ struct smq_policy {
 	 * The hash tables allows us to quickly find an entry by origin
 	 * block.
 	 */
-	struct hash_table table;
-	struct hash_table hotspot_table;
+	struct smq_hash_table table;
+	struct smq_hash_table hotspot_table;
 
 	bool current_writeback_sentinels;
 	unsigned long next_writeback_period;
@@ -828,6 +838,10 @@ struct smq_policy {
 
 	unsigned long next_hotspot_period;
 	unsigned long next_cache_period;
+
+	struct background_tracker *bg_work;
+
+	bool migrations_allowed;
 };
 
 /*----------------------------------------------------------------*/
@@ -876,15 +890,15 @@ static void __update_demote_sentinels(struct smq_policy *mq)
 static void update_sentinels(struct smq_policy *mq)
 {
 	if (time_after(jiffies, mq->next_writeback_period)) {
-		__update_writeback_sentinels(mq);
 		mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
 		mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
+		__update_writeback_sentinels(mq);
 	}
 
 	if (time_after(jiffies, mq->next_demote_period)) {
-		__update_demote_sentinels(mq);
 		mq->next_demote_period = jiffies + DEMOTE_PERIOD;
 		mq->current_demote_sentinels = !mq->current_demote_sentinels;
+		__update_demote_sentinels(mq);
 	}
 }
 
@@ -920,55 +934,40 @@ static void sentinels_init(struct smq_policy *mq)
 
 /*----------------------------------------------------------------*/
 
-/*
- * These methods tie together the dirty queue, clean queue and hash table.
- */
-static void push_new(struct smq_policy *mq, struct entry *e)
+static void del_queue(struct smq_policy *mq, struct entry *e)
 {
-	struct queue *q = e->dirty ? &mq->dirty : &mq->clean;
-	h_insert(&mq->table, e);
-	q_push(q, e);
+	q_del(e->dirty ? &mq->dirty : &mq->clean, e);
 }
 
-static void push(struct smq_policy *mq, struct entry *e)
+static void push_queue(struct smq_policy *mq, struct entry *e)
 {
-	struct entry *sentinel;
-
-	h_insert(&mq->table, e);
-
-	/*
-	 * Punch this into the queue just in front of the sentinel, to
-	 * ensure it's cleaned straight away.
-	 */
-	if (e->dirty) {
-		sentinel = writeback_sentinel(mq, e->level);
-		q_push_before(&mq->dirty, sentinel, e);
-	} else {
-		sentinel = demote_sentinel(mq, e->level);
-		q_push_before(&mq->clean, sentinel, e);
-	}
+	if (e->dirty)
+		q_push(&mq->dirty, e);
+	else
+		q_push(&mq->clean, e);
 }
 
-/*
- * Removes an entry from cache.  Removes from the hash table.
- */
-static void __del(struct smq_policy *mq, struct queue *q, struct entry *e)
+// !h, !q, a -> h, q, a
+static void push(struct smq_policy *mq, struct entry *e)
 {
-	q_del(q, e);
-	h_remove(&mq->table, e);
+	h_insert(&mq->table, e);
+	if (!e->pending_work)
+		push_queue(mq, e);
 }
 
-static void del(struct smq_policy *mq, struct entry *e)
+static void push_queue_front(struct smq_policy *mq, struct entry *e)
 {
-	__del(mq, e->dirty ? &mq->dirty : &mq->clean, e);
+	if (e->dirty)
+		q_push_front(&mq->dirty, e);
+	else
+		q_push_front(&mq->clean, e);
 }
 
-static struct entry *pop_old(struct smq_policy *mq, struct queue *q, unsigned max_level)
+static void push_front(struct smq_policy *mq, struct entry *e)
 {
-	struct entry *e = q_pop_old(q, max_level);
-	if (e)
-		h_remove(&mq->table, e);
-	return e;
+	h_insert(&mq->table, e);
+	if (!e->pending_work)
+		push_queue_front(mq, e);
 }
 
 static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
@@ -978,16 +977,21 @@ static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
 
 static void requeue(struct smq_policy *mq, struct entry *e)
 {
-	struct entry *sentinel;
+	/*
+	 * Pending work has temporarily been taken out of the queues.
+	 */
+	if (e->pending_work)
+		return;
 
 	if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) {
-		if (e->dirty) {
-			sentinel = writeback_sentinel(mq, e->level);
-			q_requeue_before(&mq->dirty, sentinel, e, 1u);
-		} else {
-			sentinel = demote_sentinel(mq, e->level);
-			q_requeue_before(&mq->clean, sentinel, e, 1u);
+		if (!e->dirty) {
+			q_requeue(&mq->clean, e, 1u, NULL, NULL);
+			return;
 		}
+
+		q_requeue(&mq->dirty, e, 1u,
+			  get_sentinel(&mq->writeback_sentinel_alloc, e->level, !mq->current_writeback_sentinels),
+			  get_sentinel(&mq->writeback_sentinel_alloc, e->level, mq->current_writeback_sentinels));
 	}
 }
 
@@ -1026,6 +1030,8 @@ static void update_promote_levels(struct smq_policy *mq)
 	unsigned threshold_level = allocator_empty(&mq->cache_alloc) ?
 		default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u);
 
+	threshold_level = max(threshold_level, NR_HOTSPOT_LEVELS);
+
 	/*
 	 * If the hotspot queue is performing badly then we have little
 	 * confidence that we know which blocks to promote.  So we cut down
@@ -1045,7 +1051,7 @@ static void update_promote_levels(struct smq_policy *mq)
 	}
 
 	mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level;
-	mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level) + 2u;
+	mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level);
 }
 
 /*
@@ -1095,34 +1101,142 @@ static void end_cache_period(struct smq_policy *mq)
 	}
 }
 
-static int demote_cblock(struct smq_policy *mq,
-			 struct policy_locker *locker,
-			 dm_oblock_t *oblock)
+/*----------------------------------------------------------------*/
+
+/*
+ * Targets are given as a percentage.
+ */
+#define CLEAN_TARGET 25u
+#define FREE_TARGET 25u
+
+static unsigned percent_to_target(struct smq_policy *mq, unsigned p)
 {
-	struct entry *demoted = q_peek(&mq->clean, mq->clean.nr_levels, false);
-	if (!demoted)
-		/*
-		 * We could get a block from mq->dirty, but that
-		 * would add extra latency to the triggering bio as it
-		 * waits for the writeback.  Better to not promote this
-		 * time and hope there's a clean block next time this block
-		 * is hit.
-		 */
-		return -ENOSPC;
+	return from_cblock(mq->cache_size) * p / 100u;
+}
+
+static bool clean_target_met(struct smq_policy *mq, bool idle)
+{
+	/*
+	 * Cache entries may not be populated.  So we cannot rely on the
+	 * size of the clean queue.
+	 */
+	unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
 
-	if (locker->fn(locker, demoted->oblock))
+	if (idle)
 		/*
-		 * We couldn't lock this block.
+		 * We'd like to clean everything.
 		 */
-		return -EBUSY;
+		return q_size(&mq->dirty) == 0u;
+	else
+		return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >=
+		       percent_to_target(mq, CLEAN_TARGET);
+}
 
-	del(mq, demoted);
-	*oblock = demoted->oblock;
-	free_entry(&mq->cache_alloc, demoted);
+static bool free_target_met(struct smq_policy *mq, bool idle)
+{
+	unsigned nr_free = from_cblock(mq->cache_size) -
+			   mq->cache_alloc.nr_allocated;
 
-	return 0;
+	if (idle)
+		return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
+		       percent_to_target(mq, FREE_TARGET);
+	else
+		return true;
 }
 
+/*----------------------------------------------------------------*/
+
+static void mark_pending(struct smq_policy *mq, struct entry *e)
+{
+	BUG_ON(e->sentinel);
+	BUG_ON(!e->allocated);
+	BUG_ON(e->pending_work);
+	e->pending_work = true;
+}
+
+static void clear_pending(struct smq_policy *mq, struct entry *e)
+{
+	BUG_ON(!e->pending_work);
+	e->pending_work = false;
+}
+
+static void queue_writeback(struct smq_policy *mq)
+{
+	int r;
+	struct policy_work work;
+	struct entry *e;
+
+	e = q_peek(&mq->dirty, mq->dirty.nr_levels, false);
+	if (e) {
+		mark_pending(mq, e);
+		q_del(&mq->dirty, e);
+
+		work.op = POLICY_WRITEBACK;
+		work.oblock = e->oblock;
+		work.cblock = infer_cblock(mq, e);
+
+		r = btracker_queue(mq->bg_work, &work, NULL);
+		WARN_ON_ONCE(r); // FIXME: finish, I think we have to get rid of this race.
+	}
+}
+
+static void queue_demotion(struct smq_policy *mq)
+{
+	struct policy_work work;
+	struct entry *e;
+
+	if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
+		return;
+
+	e = q_peek(&mq->clean, mq->clean.nr_levels, true);
+	if (!e) {
+		if (!clean_target_met(mq, false))
+			queue_writeback(mq);
+		return;
+	}
+
+	mark_pending(mq, e);
+	q_del(&mq->clean, e);
+
+	work.op = POLICY_DEMOTE;
+	work.oblock = e->oblock;
+	work.cblock = infer_cblock(mq, e);
+	btracker_queue(mq->bg_work, &work, NULL);
+}
+
+static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
+			    struct policy_work **workp)
+{
+	struct entry *e;
+	struct policy_work work;
+
+	if (!mq->migrations_allowed)
+		return;
+
+	if (allocator_empty(&mq->cache_alloc)) {
+		if (!free_target_met(mq, false))
+			queue_demotion(mq);
+		return;
+	}
+
+	if (btracker_promotion_already_present(mq->bg_work, oblock))
+		return;
+
+	/*
+	 * We allocate the entry now to reserve the cblock.  If the
+	 * background work is aborted we must remember to free it.
+	 */
+	e = alloc_entry(&mq->cache_alloc);
+	BUG_ON(!e);
+	e->pending_work = true;
+	work.op = POLICY_PROMOTE;
+	work.oblock = oblock;
+	work.cblock = infer_cblock(mq, e);
+	btracker_queue(mq->bg_work, &work, workp);
+}
+
+/*----------------------------------------------------------------*/
+
 enum promote_result {
 	PROMOTE_NOT,
 	PROMOTE_TEMPORARY,
@@ -1137,49 +1251,18 @@ static enum promote_result maybe_promote(bool promote)
 	return promote ? PROMOTE_PERMANENT : PROMOTE_NOT;
 }
 
-static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e, struct bio *bio,
-					  bool fast_promote)
+static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e,
+					  int data_dir, bool fast_promote)
 {
-	if (bio_data_dir(bio) == WRITE) {
+	if (data_dir == WRITE) {
 		if (!allocator_empty(&mq->cache_alloc) && fast_promote)
 			return PROMOTE_TEMPORARY;
 
-		else
-			return maybe_promote(hs_e->level >= mq->write_promote_level);
+		return maybe_promote(hs_e->level >= mq->write_promote_level);
 	} else
 		return maybe_promote(hs_e->level >= mq->read_promote_level);
 }
 
-static void insert_in_cache(struct smq_policy *mq, dm_oblock_t oblock,
-			    struct policy_locker *locker,
-			    struct policy_result *result, enum promote_result pr)
-{
-	int r;
-	struct entry *e;
-
-	if (allocator_empty(&mq->cache_alloc)) {
-		result->op = POLICY_REPLACE;
-		r = demote_cblock(mq, locker, &result->old_oblock);
-		if (r) {
-			result->op = POLICY_MISS;
-			return;
-		}
-
-	} else
-		result->op = POLICY_NEW;
-
-	e = alloc_entry(&mq->cache_alloc);
-	BUG_ON(!e);
-	e->oblock = oblock;
-
-	if (pr == PROMOTE_TEMPORARY)
-		push(mq, e);
-	else
-		push_new(mq, e);
-
-	result->cblock = infer_cblock(mq, e);
-}
-
 static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
 {
 	sector_t r = from_oblock(b);
@@ -1187,7 +1270,7 @@ static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
 	return to_oblock(r);
 }
 
-static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b, struct bio *bio)
+static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b)
 {
 	unsigned hi;
 	dm_oblock_t hb = to_hblock(mq, b);
@@ -1199,7 +1282,8 @@ static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b,
 		hi = get_index(&mq->hotspot_alloc, e);
 		q_requeue(&mq->hotspot, e,
 			  test_and_set_bit(hi, mq->hotspot_hit_bits) ?
-			  0u : mq->hotspot_level_jump);
+			  0u : mq->hotspot_level_jump,
+			  NULL, NULL);
 
 	} else {
 		stats_miss(&mq->hotspot_stats);
@@ -1225,47 +1309,6 @@ static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b,
 	return e;
 }
 
-/*
- * Looks the oblock up in the hash table, then decides whether to put in
- * pre_cache, or cache etc.
- */
-static int map(struct smq_policy *mq, struct bio *bio, dm_oblock_t oblock,
-	       bool can_migrate, bool fast_promote,
-	       struct policy_locker *locker, struct policy_result *result)
-{
-	struct entry *e, *hs_e;
-	enum promote_result pr;
-
-	hs_e = update_hotspot_queue(mq, oblock, bio);
-
-	e = h_lookup(&mq->table, oblock);
-	if (e) {
-		stats_level_accessed(&mq->cache_stats, e->level);
-
-		requeue(mq, e);
-		result->op = POLICY_HIT;
-		result->cblock = infer_cblock(mq, e);
-
-	} else {
-		stats_miss(&mq->cache_stats);
-
-		pr = should_promote(mq, hs_e, bio, fast_promote);
-		if (pr == PROMOTE_NOT)
-			result->op = POLICY_MISS;
-
-		else {
-			if (!can_migrate) {
-				result->op = POLICY_MISS;
-				return -EWOULDBLOCK;
-			}
-
-			insert_in_cache(mq, oblock, locker, result, pr);
-		}
-	}
-
-	return 0;
-}
-
 /*----------------------------------------------------------------*/
 
 /*
@@ -1282,6 +1325,7 @@ static void smq_destroy(struct dm_cache_policy *p)
 {
 	struct smq_policy *mq = to_smq_policy(p);
 
+	btracker_destroy(mq->bg_work);
 	h_exit(&mq->hotspot_table);
 	h_exit(&mq->table);
 	free_bitset(mq->hotspot_hit_bits);
@@ -1290,234 +1334,247 @@ static void smq_destroy(struct dm_cache_policy *p)
 	kfree(mq);
 }
 
-static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
-		   bool can_block, bool can_migrate, bool fast_promote,
-		   struct bio *bio, struct policy_locker *locker,
-		   struct policy_result *result)
-{
-	int r;
-	unsigned long flags;
-	struct smq_policy *mq = to_smq_policy(p);
-
-	result->op = POLICY_MISS;
-
-	spin_lock_irqsave(&mq->lock, flags);
-	r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result);
-	spin_unlock_irqrestore(&mq->lock, flags);
-
-	return r;
-}
+/*----------------------------------------------------------------*/
 
-static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
+static int __lookup(struct smq_policy *mq, dm_oblock_t oblock, dm_cblock_t *cblock,
+		    int data_dir, bool fast_copy,
+		    struct policy_work **work, bool *background_work)
 {
-	int r;
-	unsigned long flags;
-	struct smq_policy *mq = to_smq_policy(p);
-	struct entry *e;
+	struct entry *e, *hs_e;
+	enum promote_result pr;
+
+	*background_work = false;
 
-	spin_lock_irqsave(&mq->lock, flags);
 	e = h_lookup(&mq->table, oblock);
 	if (e) {
+		stats_level_accessed(&mq->cache_stats, e->level);
+
+		requeue(mq, e);
 		*cblock = infer_cblock(mq, e);
-		r = 0;
-	} else
-		r = -ENOENT;
-	spin_unlock_irqrestore(&mq->lock, flags);
+		return 0;
 
-	return r;
-}
+	} else {
+		stats_miss(&mq->cache_stats);
 
-static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, bool set)
-{
-	struct entry *e;
+		/*
+		 * The hotspot queue only gets updated with misses.
+		 */
+		hs_e = update_hotspot_queue(mq, oblock);
 
-	e = h_lookup(&mq->table, oblock);
-	BUG_ON(!e);
+		pr = should_promote(mq, hs_e, data_dir, fast_copy);
+		if (pr != PROMOTE_NOT) {
+			queue_promotion(mq, oblock, work);
+			*background_work = true;
+		}
 
-	del(mq, e);
-	e->dirty = set;
-	push(mq, e);
+		return -ENOENT;
+	}
 }
 
-static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
+static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
+		      int data_dir, bool fast_copy,
+		      bool *background_work)
 {
+	int r;
 	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
 
 	spin_lock_irqsave(&mq->lock, flags);
-	__smq_set_clear_dirty(mq, oblock, true);
+	r = __lookup(mq, oblock, cblock,
+		     data_dir, fast_copy,
+		     NULL, background_work);
 	spin_unlock_irqrestore(&mq->lock, flags);
+
+	return r;
 }
 
-static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
+static int smq_lookup_with_work(struct dm_cache_policy *p,
+				dm_oblock_t oblock, dm_cblock_t *cblock,
+				int data_dir, bool fast_copy,
+				struct policy_work **work)
 {
-	struct smq_policy *mq = to_smq_policy(p);
+	int r;
+	bool background_queued;
 	unsigned long flags;
+	struct smq_policy *mq = to_smq_policy(p);
 
 	spin_lock_irqsave(&mq->lock, flags);
-	__smq_set_clear_dirty(mq, oblock, false);
+	r = __lookup(mq, oblock, cblock, data_dir, fast_copy, work, &background_queued);
 	spin_unlock_irqrestore(&mq->lock, flags);
-}
 
-static unsigned random_level(dm_cblock_t cblock)
-{
-	return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
+	return r;
 }
 
-static int smq_load_mapping(struct dm_cache_policy *p,
-			    dm_oblock_t oblock, dm_cblock_t cblock,
-			    uint32_t hint, bool hint_valid)
+static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
+				   struct policy_work **result)
 {
+	int r;
+	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
-	struct entry *e;
 
-	e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
-	e->oblock = oblock;
-	e->dirty = false;	/* this gets corrected in a minute */
-	e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
-	push(mq, e);
-
-	return 0;
-}
+	spin_lock_irqsave(&mq->lock, flags);
+	r = btracker_issue(mq->bg_work, result);
+	if (r == -ENODATA) {
+		/* find some writeback work to do */
+		if (mq->migrations_allowed && !free_target_met(mq, idle))
+			queue_demotion(mq);
 
-static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
-{
-	struct smq_policy *mq = to_smq_policy(p);
-	struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
+		else if (!clean_target_met(mq, idle))
+			queue_writeback(mq);
 
-	if (!e->allocated)
-		return 0;
+		r = btracker_issue(mq->bg_work, result);
+	}
+	spin_unlock_irqrestore(&mq->lock, flags);
 
-	return e->level;
+	return r;
 }
 
-static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
-{
-	struct entry *e;
+/*
+ * We need to clear any pending work flags that have been set, and in the
+ * case of promotion free the entry for the destination cblock.
+ */
+static void __complete_background_work(struct smq_policy *mq,
+				       struct policy_work *work,
+				       bool success)
+{
+	struct entry *e = get_entry(&mq->cache_alloc,
+				    from_cblock(work->cblock));
+
+	switch (work->op) {
+	case POLICY_PROMOTE:
+		// !h, !q, a
+		clear_pending(mq, e);
+		if (success) {
+			e->oblock = work->oblock;
+			push(mq, e);
+			// h, q, a
+		} else {
+			free_entry(&mq->cache_alloc, e);
+			// !h, !q, !a
+		}
+		break;
 
-	e = h_lookup(&mq->table, oblock);
-	BUG_ON(!e);
+	case POLICY_DEMOTE:
+		// h, !q, a
+		if (success) {
+			h_remove(&mq->table, e);
+			free_entry(&mq->cache_alloc, e);
+			// !h, !q, !a
+		} else {
+			clear_pending(mq, e);
+			push_queue(mq, e);
+			// h, q, a
+		}
+		break;
 
-	del(mq, e);
-	free_entry(&mq->cache_alloc, e);
+	case POLICY_WRITEBACK:
+		// h, !q, a
+		clear_pending(mq, e);
+		push_queue(mq, e);
+		// h, q, a
+		break;
+	}
+
+	btracker_complete(mq->bg_work, work);
 }
 
-static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
+static void smq_complete_background_work(struct dm_cache_policy *p,
+					 struct policy_work *work,
+					 bool success)
 {
-	struct smq_policy *mq = to_smq_policy(p);
 	unsigned long flags;
+	struct smq_policy *mq = to_smq_policy(p);
 
 	spin_lock_irqsave(&mq->lock, flags);
-	__remove_mapping(mq, oblock);
+	__complete_background_work(mq, work, success);
 	spin_unlock_irqrestore(&mq->lock, flags);
 }
 
-static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock)
+// in_hash(oblock) -> in_hash(oblock)
+static void __smq_set_clear_dirty(struct smq_policy *mq, dm_cblock_t cblock, bool set)
 {
 	struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
 
-	if (!e || !e->allocated)
-		return -ENODATA;
-
-	del(mq, e);
-	free_entry(&mq->cache_alloc, e);
-
-	return 0;
+	if (e->pending_work)
+		e->dirty = set;
+	else {
+		del_queue(mq, e);
+		e->dirty = set;
+		push_queue(mq, e);
+	}
 }
 
-static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
+static void smq_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
 {
-	int r;
 	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
 
 	spin_lock_irqsave(&mq->lock, flags);
-	r = __remove_cblock(mq, cblock);
+	__smq_set_clear_dirty(mq, cblock, true);
 	spin_unlock_irqrestore(&mq->lock, flags);
-
-	return r;
 }
 
-
-#define CLEAN_TARGET_CRITICAL 5u /* percent */
-
-static bool clean_target_met(struct smq_policy *mq, bool critical)
+static void smq_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
 {
-	if (critical) {
-		/*
-		 * Cache entries may not be populated.  So we're cannot rely on the
-		 * size of the clean queue.
-		 */
-		unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
-		unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_CRITICAL / 100u;
+	struct smq_policy *mq = to_smq_policy(p);
+	unsigned long flags;
 
-		return nr_clean >= target;
-	} else
-		return !q_size(&mq->dirty);
+	spin_lock_irqsave(&mq->lock, flags);
+	__smq_set_clear_dirty(mq, cblock, false);
+	spin_unlock_irqrestore(&mq->lock, flags);
 }
 
-static int __smq_writeback_work(struct smq_policy *mq, dm_oblock_t *oblock,
-				dm_cblock_t *cblock, bool critical_only)
+static unsigned random_level(dm_cblock_t cblock)
 {
-	struct entry *e = NULL;
-	bool target_met = clean_target_met(mq, critical_only);
-
-	if (critical_only)
-		/*
-		 * Always try and keep the bottom level clean.
-		 */
-		e = pop_old(mq, &mq->dirty, target_met ? 1u : mq->dirty.nr_levels);
+	return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
+}
 
-	else
-		e = pop_old(mq, &mq->dirty, mq->dirty.nr_levels);
+static int smq_load_mapping(struct dm_cache_policy *p,
+			    dm_oblock_t oblock, dm_cblock_t cblock,
+			    bool dirty, uint32_t hint, bool hint_valid)
+{
+	struct smq_policy *mq = to_smq_policy(p);
+	struct entry *e;
 
-	if (!e)
-		return -ENODATA;
+	e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
+	e->oblock = oblock;
+	e->dirty = dirty;
+	e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
+	e->pending_work = false;
 
-	*oblock = e->oblock;
-	*cblock = infer_cblock(mq, e);
-	e->dirty = false;
-	push_new(mq, e);
+	/*
+	 * When we load mappings we push ahead of both sentinels in order to
+	 * allow demotions and cleaning to occur immediately.
+	 */
+	push_front(mq, e);
 
 	return 0;
 }
 
-static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
-			      dm_cblock_t *cblock, bool critical_only)
+static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock)
 {
-	int r;
-	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
+	struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
 
-	spin_lock_irqsave(&mq->lock, flags);
-	r = __smq_writeback_work(mq, oblock, cblock, critical_only);
-	spin_unlock_irqrestore(&mq->lock, flags);
-
-	return r;
-}
-
-static void __force_mapping(struct smq_policy *mq,
-			    dm_oblock_t current_oblock, dm_oblock_t new_oblock)
-{
-	struct entry *e = h_lookup(&mq->table, current_oblock);
+	if (!e->allocated)
+		return -ENODATA;
 
-	if (e) {
-		del(mq, e);
-		e->oblock = new_oblock;
-		e->dirty = true;
-		push(mq, e);
-	}
+	// FIXME: what if this block has pending background work?
+	del_queue(mq, e);
+	h_remove(&mq->table, e);
+	free_entry(&mq->cache_alloc, e);
+	return 0;
 }
 
-static void smq_force_mapping(struct dm_cache_policy *p,
-			      dm_oblock_t current_oblock, dm_oblock_t new_oblock)
+static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
 {
-	unsigned long flags;
 	struct smq_policy *mq = to_smq_policy(p);
+	struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
 
-	spin_lock_irqsave(&mq->lock, flags);
-	__force_mapping(mq, current_oblock, new_oblock);
-	spin_unlock_irqrestore(&mq->lock, flags);
+	if (!e->allocated)
+		return 0;
+
+	return e->level;
 }
 
 static dm_cblock_t smq_residency(struct dm_cache_policy *p)
@@ -1546,6 +1603,12 @@ static void smq_tick(struct dm_cache_policy *p, bool can_block)
 	spin_unlock_irqrestore(&mq->lock, flags);
 }
 
+static void smq_allow_migrations(struct dm_cache_policy *p, bool allow)
+{
+	struct smq_policy *mq = to_smq_policy(p);
+	mq->migrations_allowed = allow;
+}
+
 /*
  * smq has no config values, but the old mq policy did.  To avoid breaking
  * software we continue to accept these configurables for the mq policy,
@@ -1590,18 +1653,18 @@ static int mq_emit_config_values(struct dm_cache_policy *p, char *result,
 static void init_policy_functions(struct smq_policy *mq, bool mimic_mq)
 {
 	mq->policy.destroy = smq_destroy;
-	mq->policy.map = smq_map;
 	mq->policy.lookup = smq_lookup;
+	mq->policy.lookup_with_work = smq_lookup_with_work;
+	mq->policy.get_background_work = smq_get_background_work;
+	mq->policy.complete_background_work = smq_complete_background_work;
 	mq->policy.set_dirty = smq_set_dirty;
 	mq->policy.clear_dirty = smq_clear_dirty;
 	mq->policy.load_mapping = smq_load_mapping;
+	mq->policy.invalidate_mapping = smq_invalidate_mapping;
 	mq->policy.get_hint = smq_get_hint;
-	mq->policy.remove_mapping = smq_remove_mapping;
-	mq->policy.remove_cblock = smq_remove_cblock;
-	mq->policy.writeback_work = smq_writeback_work;
-	mq->policy.force_mapping = smq_force_mapping;
 	mq->policy.residency = smq_residency;
 	mq->policy.tick = smq_tick;
+	mq->policy.allow_migrations = smq_allow_migrations;
 
 	if (mimic_mq) {
 		mq->policy.set_config_value = mq_set_config_value;
@@ -1633,7 +1696,8 @@ static void calc_hotspot_params(sector_t origin_size,
 static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
 					    sector_t origin_size,
 					    sector_t cache_block_size,
-					    bool mimic_mq)
+					    bool mimic_mq,
+					    bool migrations_allowed)
 {
 	unsigned i;
 	unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
@@ -1658,11 +1722,11 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
 	}
 
 	init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue);
-        for (i = 0; i < nr_sentinels_per_queue; i++)
+	for (i = 0; i < nr_sentinels_per_queue; i++)
 		get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true;
 
 	init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels);
-        for (i = 0; i < nr_sentinels_per_queue; i++)
+	for (i = 0; i < nr_sentinels_per_queue; i++)
 		get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true;
 
 	init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels,
@@ -1715,8 +1779,16 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
 	mq->next_hotspot_period = jiffies;
 	mq->next_cache_period = jiffies;
 
+	mq->bg_work = btracker_create(10240); /* FIXME: hard coded value */
+	if (!mq->bg_work)
+		goto bad_btracker;
+
+	mq->migrations_allowed = migrations_allowed;
+
 	return &mq->policy;
 
+bad_btracker:
+	h_exit(&mq->hotspot_table);
 bad_alloc_hotspot_table:
 	h_exit(&mq->table);
 bad_alloc_table:
@@ -1735,21 +1807,28 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
 					  sector_t origin_size,
 					  sector_t cache_block_size)
 {
-	return __smq_create(cache_size, origin_size, cache_block_size, false);
+	return __smq_create(cache_size, origin_size, cache_block_size, false, true);
 }
 
 static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
 					 sector_t origin_size,
 					 sector_t cache_block_size)
 {
-	return __smq_create(cache_size, origin_size, cache_block_size, true);
+	return __smq_create(cache_size, origin_size, cache_block_size, true, true);
+}
+
+static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size,
+					      sector_t origin_size,
+					      sector_t cache_block_size)
+{
+	return __smq_create(cache_size, origin_size, cache_block_size, false, false);
 }
 
 /*----------------------------------------------------------------*/
 
 static struct dm_cache_policy_type smq_policy_type = {
 	.name = "smq",
-	.version = {1, 5, 0},
+	.version = {2, 0, 0},
 	.hint_size = 4,
 	.owner = THIS_MODULE,
 	.create = smq_create
@@ -1757,15 +1836,23 @@ static struct dm_cache_policy_type smq_policy_type = {
 
 static struct dm_cache_policy_type mq_policy_type = {
 	.name = "mq",
-	.version = {1, 5, 0},
+	.version = {2, 0, 0},
 	.hint_size = 4,
 	.owner = THIS_MODULE,
 	.create = mq_create,
 };
 
+static struct dm_cache_policy_type cleaner_policy_type = {
+	.name = "cleaner",
+	.version = {2, 0, 0},
+	.hint_size = 4,
+	.owner = THIS_MODULE,
+	.create = cleaner_create,
+};
+
 static struct dm_cache_policy_type default_policy_type = {
 	.name = "default",
-	.version = {1, 5, 0},
+	.version = {2, 0, 0},
 	.hint_size = 4,
 	.owner = THIS_MODULE,
 	.create = smq_create,
@@ -1785,23 +1872,36 @@ static int __init smq_init(void)
 	r = dm_cache_policy_register(&mq_policy_type);
 	if (r) {
 		DMERR("register failed (as mq) %d", r);
-		dm_cache_policy_unregister(&smq_policy_type);
-		return -ENOMEM;
+		goto out_mq;
+	}
+
+	r = dm_cache_policy_register(&cleaner_policy_type);
+	if (r) {
+		DMERR("register failed (as cleaner) %d", r);
+		goto out_cleaner;
 	}
 
 	r = dm_cache_policy_register(&default_policy_type);
 	if (r) {
 		DMERR("register failed (as default) %d", r);
-		dm_cache_policy_unregister(&mq_policy_type);
-		dm_cache_policy_unregister(&smq_policy_type);
-		return -ENOMEM;
+		goto out_default;
 	}
 
 	return 0;
+
+out_default:
+	dm_cache_policy_unregister(&cleaner_policy_type);
+out_cleaner:
+	dm_cache_policy_unregister(&mq_policy_type);
+out_mq:
+	dm_cache_policy_unregister(&smq_policy_type);
+
+	return -ENOMEM;
 }
 
 static void __exit smq_exit(void)
 {
+	dm_cache_policy_unregister(&cleaner_policy_type);
 	dm_cache_policy_unregister(&smq_policy_type);
 	dm_cache_policy_unregister(&mq_policy_type);
 	dm_cache_policy_unregister(&default_policy_type);
@@ -1816,3 +1916,4 @@ MODULE_DESCRIPTION("smq cache policy");
 
 MODULE_ALIAS("dm-cache-default");
 MODULE_ALIAS("dm-cache-mq");
+MODULE_ALIAS("dm-cache-cleaner");
author	Joe Thornber <ejt@redhat.com>	2016-12-15 04:57:31 -0500
committer	Mike Snitzer <snitzer@redhat.com>	2017-03-07 13:28:31 -0500
commit	b29d4986d0da1a27cd35917cdb433672f5c95d7f (patch)
tree	a5d94b86cf1eb759bfef5761015135d747e80561 /drivers/md/dm-cache-policy-smq.c
parent	742c8fdc31e820503f9267070311d894978d1349 (diff)