drm/amdgpu: allocate queues horizontally across pipes

Pipes provide better concurrency than queues, therefore we want to make sure that apps use queues from different pipes whenever possible. Optimize for the trivial case where an app will consume rings in order, therefore we don't want adjacent rings to belong to the same pipe. Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Acked-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
author: Andres Rodriguez <andresx7@gmail.com> 2017-02-03 17:31:38 -0500
committer: Andres Rodriguez <andresx7@gmail.com> 2017-04-27 13:25:00 -0400
commit: a3ff741d7babd70a698fd8c928125e43640eb2ca (patch)
tree: 179b284d0e8bdf6fdc1054c52f7a7e758685eece
parent: 701592fc3a552fbe8e4dcfa09804678dacb0f8b0 (diff)
3 files changed, 113 insertions, 69 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d9d2b400567e..1c95bdee5827 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1779,6 +1779,19 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 		return NULL;
 }
 
+static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
+						int mec, int pipe, int queue)
+{
+	int bit = 0;
+
+	bit += mec * adev->gfx.mec.num_pipe_per_mec
+		* adev->gfx.mec.num_queue_per_pipe;
+	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+	bit += queue;
+
+	return test_bit(bit, adev->gfx.mec.queue_bitmap);
+}
+
 /*
  * ASICs macro.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index ce9d43186c02..06338e81575c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4752,11 +4752,42 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
 	adev->gfx.config.gb_addr_config = gb_addr_config;
 }
 
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+					int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024,
+			&adev->gfx.eop_irq, irq_type);
+	if (r)
+		return r;
+
+
+	return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
 	struct amdgpu_ring *ring;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i, r, ring_id;
+	int i, j, k, r, ring_id;
 
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -4806,39 +4837,23 @@ static int gfx_v7_0_sw_init(void *handle)
 			return r;
 	}
 
-	/* set up the compute queues */
-	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-		unsigned irq_type;
-
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		ring = &adev->gfx.compute_ring[ring_id];
-
-		/* mec0 is me1 */
-		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-				/ adev->gfx.mec.num_pipe_per_mec)
-				+ 1;
-		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-				% adev->gfx.mec.num_pipe_per_mec;
-		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-			+ ring->pipe;
-
-		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024,
-				     &adev->gfx.eop_irq, irq_type);
-		if (r)
-			return r;
-
-		ring_id++;
+	/* set up the compute queues - allocate horizontally across pipes */
+	ring_id = 0;
+	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+				if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+					continue;
+
+				r = gfx_v7_0_compute_ring_init(adev,
+								ring_id,
+								i, k, j);
+				if (r)
+					return r;
+
+				ring_id++;
+			}
+		}
 	}
 
 	/* reserve GDS, GWS and OA resource for gfx */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a89bbaa45d9d..f5992ead3fe5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2120,9 +2120,44 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+					int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+				+ (ring_id * GFX8_MEC_HPD_SIZE);
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024,
+			&adev->gfx.eop_irq, irq_type);
+	if (r)
+		return r;
+
+
+	return 0;
+}
+
 static int gfx_v8_0_sw_init(void *handle)
 {
-	int i, r, ring_id;
+	int i, j, k, r, ring_id;
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2188,43 +2223,24 @@ static int gfx_v8_0_sw_init(void *handle)
 			return r;
 	}
 
-	/* set up the compute queues */
-	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-		unsigned irq_type;
-
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
-			break;
-
-		ring = &adev->gfx.compute_ring[ring_id];
-
-		/* mec0 is me1 */
-		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-				/ adev->gfx.mec.num_pipe_per_mec)
-				+ 1;
-		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-				% adev->gfx.mec.num_pipe_per_mec;
-		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE);
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-			+ ring->pipe;
+	/* set up the compute queues - allocate horizontally across pipes */
+	ring_id = 0;
+	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+				if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+					continue;
 
-		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
-				     irq_type);
-		if (r)
-			return r;
+				r = gfx_v8_0_compute_ring_init(adev,
+								ring_id,
+								i, k, j);
+				if (r)
+					return r;
 
-		ring_id++;
+				ring_id++;
+			}
+		}
 	}
 
 	r = gfx_v8_0_kiq_init(adev);
author	Andres Rodriguez <andresx7@gmail.com>	2017-02-03 17:31:38 -0500
committer	Andres Rodriguez <andresx7@gmail.com>	2017-04-27 13:25:00 -0400
commit	a3ff741d7babd70a698fd8c928125e43640eb2ca (patch)
tree	179b284d0e8bdf6fdc1054c52f7a7e758685eece
parent	701592fc3a552fbe8e4dcfa09804678dacb0f8b0 (diff)