summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Kelley <mikelley@microsoft.com>2023-04-13 10:57:37 -0700
committerChristoph Hellwig <hch@lst.de>2023-04-16 08:25:29 +0200
commit8b0977ecc8b30a30966e76fcb64cef5041626b02 (patch)
tree03dd9c0b0e6db4deb953e00c97b0a8c617264866
parent5499d01c029069044a3b3e50501c77b474c96178 (diff)
swiotlb: track and report io_tlb_used high water marks in debugfs
swiotlb currently reports the total number of slabs and the instantaneous in-use slabs in debugfs. But with increased usage of swiotlb for all I/O in Confidential Computing (coco) VMs, it has become difficult to know how much memory to allocate for swiotlb bounce buffers, either via the automatic algorithm in the kernel or by specifying a value on the kernel boot line. The current automatic algorithm generously allocates swiotlb bounce buffer memory, and may be wasting significant memory in many use cases. To support better understanding of swiotlb usage, add tracking of the the high water mark for usage of the default swiotlb bounce buffer memory pool and any reserved memory pools. Report these high water marks in debugfs along with the other swiotlb pool metrics. Allow the high water marks to be reset to zero at runtime by writing to them. Signed-off-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--include/linux/swiotlb.h7
-rw-r--r--kernel/dma/swiotlb.c66
2 files changed, 73 insertions, 0 deletions
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index bcef10e20ea4..6dc4598d2260 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -87,6 +87,11 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
* @for_alloc: %true if the pool is used for memory allocation
* @nareas: The area number in the pool.
* @area_nslabs: The slot number in the area.
+ * @total_used: The total number of slots in the pool that are currently used
+ * across all areas. Used only for calculating used_hiwater in
+ * debugfs.
+ * @used_hiwater: The high water mark for total_used. Used only for reporting
+ * in debugfs.
*/
struct io_tlb_mem {
phys_addr_t start;
@@ -102,6 +107,8 @@ struct io_tlb_mem {
unsigned int area_nslabs;
struct io_tlb_area *areas;
struct io_tlb_slot *slots;
+ atomic_long_t total_used;
+ atomic_long_t used_hiwater;
};
extern struct io_tlb_mem io_tlb_default_mem;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 938c959ab19e..9bbc2802a444 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -609,6 +609,40 @@ static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
}
/*
+ * Track the total used slots with a global atomic value in order to have
+ * correct information to determine the high water mark. The mem_used()
+ * function gives imprecise results because there's no locking across
+ * multiple areas.
+ */
+#ifdef CONFIG_DEBUG_FS
+static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ unsigned long old_hiwater, new_used;
+
+ new_used = atomic_long_add_return(nslots, &mem->total_used);
+ old_hiwater = atomic_long_read(&mem->used_hiwater);
+ do {
+ if (new_used <= old_hiwater)
+ break;
+ } while (!atomic_long_try_cmpxchg(&mem->used_hiwater,
+ &old_hiwater, new_used));
+}
+
+static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_sub(nslots, &mem->total_used);
+}
+
+#else /* !CONFIG_DEBUG_FS */
+static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/*
* Find a suitable number of IO TLB entries size that will fit this request and
* allocate a buffer from that IO TLB pool.
*/
@@ -702,6 +736,8 @@ found:
area->index = wrap_area_index(mem, index + nslots);
area->used += nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ inc_used_and_hiwater(mem, nslots);
return slot_index;
}
@@ -834,6 +870,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
area->used -= nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ dec_used(mem, nslots);
}
/*
@@ -935,11 +973,37 @@ static int io_tlb_used_get(void *data, u64 *val)
*val = mem_used(mem);
return 0;
}
+
+static int io_tlb_hiwater_get(void *data, u64 *val)
+{
+ struct io_tlb_mem *mem = data;
+
+ *val = atomic_long_read(&mem->used_hiwater);
+ return 0;
+}
+
+static int io_tlb_hiwater_set(void *data, u64 val)
+{
+ struct io_tlb_mem *mem = data;
+
+ /* Only allow setting to zero */
+ if (val != 0)
+ return -EINVAL;
+
+ atomic_long_set(&mem->used_hiwater, val);
+ return 0;
+}
+
DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
+ io_tlb_hiwater_set, "%llu\n");
static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
{
+ atomic_long_set(&mem->total_used, 0);
+ atomic_long_set(&mem->used_hiwater, 0);
+
mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
if (!mem->nslabs)
return;
@@ -947,6 +1011,8 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem,
&fops_io_tlb_used);
+ debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
+ &fops_io_tlb_hiwater);
}
static int __init __maybe_unused swiotlb_create_default_debugfs(void)