summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c84
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h1
5 files changed, 211 insertions, 44 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d2d3ad2ded23..d9d2b400567e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -46,6 +46,8 @@
#include <drm/drm_gem.h>
#include <drm/amdgpu_drm.h>
+#include <kgd_kfd_interface.h>
+
#include "amd_shared.h"
#include "amdgpu_mode.h"
#include "amdgpu_ih.h"
@@ -893,6 +895,8 @@ struct amdgpu_rlc {
u32 *register_restore;
};
+#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
@@ -902,6 +906,9 @@ struct amdgpu_mec {
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
+
+ /* These are the resources for which amdgpu takes ownership */
+ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index daca24572bcb..4a680fdf2429 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -49,7 +49,6 @@
#include "oss/oss_2_0_sh_mask.h"
#define GFX7_NUM_GFX_RINGS 1
-#define GFX7_NUM_COMPUTE_RINGS 8
#define GFX7_MEC_HPD_SIZE 2048
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -2823,18 +2822,45 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
}
}
+static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, queue, pipe, mec;
+
+ /* policy for amdgpu compute queue ownership */
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+ queue = i % adev->gfx.mec.num_queue_per_pipe;
+ pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ mec = (i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec;
+
+ /* we've run out of HW */
+ if (mec >= adev->gfx.mec.num_mec)
+ break;
+
+ /* policy: amdgpu owns all queues in the first pipe */
+ if (mec == 0 && pipe == 0)
+ set_bit(i, adev->gfx.mec.queue_bitmap);
+ }
+
+ /* update the number of active compute rings */
+ adev->gfx.num_compute_rings =
+ bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* If you hit this case and edited the policy, you probably just
+ * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+ if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
{
int r;
u32 *hpd;
size_t mec_hpd_size;
- /*
- * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
- * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
- * Nonetheless, we assign only 1 pipe because all other pipes will
- * be handled by KFD
- */
+ bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
switch (adev->asic_type) {
case CHIP_KAVERI:
adev->gfx.mec.num_mec = 2;
@@ -2850,6 +2876,10 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
+ /* take ownership of the relevant compute queues */
+ gfx_v7_0_compute_queue_acquire(adev);
+
+ /* allocate space for ALL pipes (even the ones we don't own) */
mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
* GFX7_MEC_HPD_SIZE * 2;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
@@ -4530,7 +4560,7 @@ static int gfx_v7_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
gfx_v7_0_set_ring_funcs(adev);
@@ -4726,7 +4756,7 @@ static int gfx_v7_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i, r;
+ int i, r, ring_id;
/* EOP Event */
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -4777,28 +4807,38 @@ static int gfx_v7_0_sw_init(void *handle)
}
/* set up the compute queues */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
unsigned irq_type;
- /* max 32 queues per MEC */
- if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
- DRM_ERROR("Too many (%d) compute rings!\n", i);
- break;
- }
- ring = &adev->gfx.compute_ring[i];
+ if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ continue;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec)
+ + 1;
+ ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
ring->ring_obj = NULL;
ring->use_doorbell = true;
- ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
- ring->me = 1; /* first MEC */
- ring->pipe = i / 8;
- ring->queue = i % 8;
+ ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
- irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024,
&adev->gfx.eop_irq, irq_type);
if (r)
return r;
+
+ ring_id++;
}
/* reserve GDS, GWS and OA resource for gfx */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5b3078402d25..9001859cdb41 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -52,7 +52,6 @@
#include "smu/smu_7_1_3_d.h"
#define GFX8_NUM_GFX_RINGS 1
-#define GFX8_NUM_COMPUTE_RINGS 8
#define GFX8_MEC_HPD_SIZE 2048
#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -1415,12 +1414,45 @@ static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
amdgpu_ring_fini(ring);
}
+static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, queue, pipe, mec;
+
+ /* policy for amdgpu compute queue ownership */
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+ queue = i % adev->gfx.mec.num_queue_per_pipe;
+ pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ mec = (i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec;
+
+ /* we've run out of HW */
+ if (mec >= adev->gfx.mec.num_mec)
+ break;
+
+ /* policy: amdgpu owns all queues in the first pipe */
+ if (mec == 0 && pipe == 0)
+ set_bit(i, adev->gfx.mec.queue_bitmap);
+ }
+
+ /* update the number of active compute rings */
+ adev->gfx.num_compute_rings =
+ bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* If you hit this case and edited the policy, you probably just
+ * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+ if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
{
int r;
u32 *hpd;
size_t mec_hpd_size;
+ bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
switch (adev->asic_type) {
case CHIP_FIJI:
case CHIP_TONGA:
@@ -1440,8 +1472,10 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
- /* only 1 pipe of the first MEC is owned by amdgpu */
- mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX8_MEC_HPD_SIZE;
+ /* take ownership of the relevant compute queues */
+ gfx_v8_0_compute_queue_acquire(adev);
+
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
@@ -2088,7 +2122,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v8_0_sw_init(void *handle)
{
- int i, r;
+ int i, r, ring_id;
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2155,29 +2189,42 @@ static int gfx_v8_0_sw_init(void *handle)
}
/* set up the compute queues */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
unsigned irq_type;
- /* max 32 queues per MEC */
- if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
- DRM_ERROR("Too many (%d) compute rings!\n", i);
+ if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ continue;
+
+ if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
break;
- }
- ring = &adev->gfx.compute_ring[i];
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec)
+ + 1;
+ ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
ring->ring_obj = NULL;
ring->use_doorbell = true;
- ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
- ring->me = 1; /* first MEC */
- ring->pipe = i / 8;
- ring->queue = i % 8;
- ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX8_MEC_HPD_SIZE);
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE);
+ ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
- irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
irq_type);
if (r)
return r;
+
+ ring_id++;
}
r = gfx_v8_0_kiq_init(adev);
@@ -5659,7 +5706,7 @@ static int gfx_v8_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
gfx_v8_0_set_ring_funcs(adev);
gfx_v8_0_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ec2ae6f85033..45d31c36c1b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -38,7 +38,6 @@
#include "v9_structs.h"
#define GFX9_NUM_GFX_RINGS 1
-#define GFX9_NUM_COMPUTE_RINGS 8
#define GFX9_NUM_SE 4
#define GFX9_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
@@ -475,6 +474,37 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
}
}
+static void gfx_v9_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, queue, pipe, mec;
+
+ /* policy for amdgpu compute queue ownership */
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+ queue = i % adev->gfx.mec.num_queue_per_pipe;
+ pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ mec = (i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec;
+
+ /* we've run out of HW */
+ if (mec >= adev->gfx.mec.num_mec)
+ break;
+
+ /* policy: amdgpu owns all queues in the first pipe */
+ if (mec == 0 && pipe == 0)
+ set_bit(i, adev->gfx.mec.queue_bitmap);
+ }
+
+ /* update the number of active compute rings */
+ adev->gfx.num_compute_rings =
+ bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* If you hit this case and edited the policy, you probably just
+ * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+ if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
{
int r;
@@ -486,6 +516,8 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
const struct gfx_firmware_header_v1_0 *mec_hdr;
+ bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
switch (adev->asic_type) {
case CHIP_VEGA10:
adev->gfx.mec.num_mec = 2;
@@ -498,8 +530,9 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
- /* only 1 pipe of the first MEC is owned by amdgpu */
- mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX9_MEC_HPD_SIZE;
+ /* take ownership of the relevant compute queues */
+ gfx_v9_0_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
@@ -1028,7 +1061,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
static int gfx_v9_0_sw_init(void *handle)
{
- int i, r;
+ int i, r, ring_id;
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1085,7 +1118,46 @@ static int gfx_v9_0_sw_init(void *handle)
}
/* set up the compute queues */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
+ unsigned irq_type;
+
+ if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ continue;
+
+ if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
+ break;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec)
+ + 1;
+ ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE);
+ ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+
+ /* set up the compute queues */
+ for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
unsigned irq_type;
/* max 32 queues per MEC */
@@ -2468,7 +2540,7 @@ static int gfx_v9_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v9_0_set_ring_funcs(adev);
gfx_v9_0_set_irq_funcs(adev);
gfx_v9_0_set_gds_init(adev);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index a09d9f352871..67f6d1921f4c 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -33,6 +33,7 @@
struct pci_dev;
#define KFD_INTERFACE_VERSION 1
+#define KGD_MAX_QUEUES 128
struct kfd_dev;
struct kgd_dev;