summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Rodriguez <andresx7@gmail.com>2017-02-02 00:38:22 -0500
committerAndres Rodriguez <andresx7@gmail.com>2017-04-27 13:24:41 -0400
commitccdb7de8e9bbedda445bdaa7983e18ade6a440c3 (patch)
tree66fc27c8aa3ad105c7c2c0f772f9a363fe054451
parent12f6c65cf42b7bf2d7548f3871524b55befc3b42 (diff)
drm/amdgpu: allow split of queues with kfd at queue granularity v4
Previously the queue/pipe split with kfd operated with pipe granularity. This patch allows amdgpu to take ownership of an arbitrary set of queues. It also consolidates the last few magic numbers in the compute initialization process into mec_init. v2: support for gfx9 v3: renamed AMDGPU_MAX_QUEUES to AMDGPU_MAX_COMPUTE_QUEUES v4: fix off-by-one in num_mec checks in *_compute_queue_acquire Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Acked-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c84
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h1
5 files changed, 211 insertions, 44 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d2d3ad2ded23..d9d2b400567e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -46,6 +46,8 @@
#include <drm/drm_gem.h>
#include <drm/amdgpu_drm.h>
+#include <kgd_kfd_interface.h>
+
#include "amd_shared.h"
#include "amdgpu_mode.h"
#include "amdgpu_ih.h"
@@ -893,6 +895,8 @@ struct amdgpu_rlc {
u32 *register_restore;
};
+#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+
struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
@@ -902,6 +906,9 @@ struct amdgpu_mec {
u32 num_pipe_per_mec;
u32 num_queue_per_pipe;
void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
+
+ /* These are the resources for which amdgpu takes ownership */
+ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index daca24572bcb..4a680fdf2429 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -49,7 +49,6 @@
#include "oss/oss_2_0_sh_mask.h"
#define GFX7_NUM_GFX_RINGS 1
-#define GFX7_NUM_COMPUTE_RINGS 8
#define GFX7_MEC_HPD_SIZE 2048
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -2823,18 +2822,45 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
}
}
+static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, queue, pipe, mec;
+
+ /* policy for amdgpu compute queue ownership */
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+ queue = i % adev->gfx.mec.num_queue_per_pipe;
+ pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ mec = (i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec;
+
+ /* we've run out of HW */
+ if (mec >= adev->gfx.mec.num_mec)
+ break;
+
+ /* policy: amdgpu owns all queues in the first pipe */
+ if (mec == 0 && pipe == 0)
+ set_bit(i, adev->gfx.mec.queue_bitmap);
+ }
+
+ /* update the number of active compute rings */
+ adev->gfx.num_compute_rings =
+ bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* If you hit this case and edited the policy, you probably just
+ * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+ if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
{
int r;
u32 *hpd;
size_t mec_hpd_size;
- /*
- * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
- * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
- * Nonetheless, we assign only 1 pipe because all other pipes will
- * be handled by KFD
- */
+ bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
switch (adev->asic_type) {
case CHIP_KAVERI:
adev->gfx.mec.num_mec = 2;
@@ -2850,6 +2876,10 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
+ /* take ownership of the relevant compute queues */
+ gfx_v7_0_compute_queue_acquire(adev);
+
+ /* allocate space for ALL pipes (even the ones we don't own) */
mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
* GFX7_MEC_HPD_SIZE * 2;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
@@ -4530,7 +4560,7 @@ static int gfx_v7_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
gfx_v7_0_set_ring_funcs(adev);
@@ -4726,7 +4756,7 @@ static int gfx_v7_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i, r;
+ int i, r, ring_id;
/* EOP Event */
r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -4777,28 +4807,38 @@ static int gfx_v7_0_sw_init(void *handle)
}
/* set up the compute queues */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
unsigned irq_type;
- /* max 32 queues per MEC */
- if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
- DRM_ERROR("Too many (%d) compute rings!\n", i);
- break;
- }
- ring = &adev->gfx.compute_ring[i];
+ if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ continue;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec)
+ + 1;
+ ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
ring->ring_obj = NULL;
ring->use_doorbell = true;
- ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
- ring->me = 1; /* first MEC */
- ring->pipe = i / 8;
- ring->queue = i % 8;
+ ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
- irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024,
&adev->gfx.eop_irq, irq_type);
if (r)
return r;
+
+ ring_id++;
}
/* reserve GDS, GWS and OA resource for gfx */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5b3078402d25..9001859cdb41 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -52,7 +52,6 @@
#include "smu/smu_7_1_3_d.h"
#define GFX8_NUM_GFX_RINGS 1
-#define GFX8_NUM_COMPUTE_RINGS 8
#define GFX8_MEC_HPD_SIZE 2048
#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -1415,12 +1414,45 @@ static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
amdgpu_ring_fini(ring);
}
+static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, queue, pipe, mec;
+
+ /* policy for amdgpu compute queue ownership */
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+ queue = i % adev->gfx.mec.num_queue_per_pipe;
+ pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ mec = (i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec;
+
+ /* we've run out of HW */
+ if (mec >= adev->gfx.mec.num_mec)
+ break;
+
+ /* policy: amdgpu owns all queues in the first pipe */
+ if (mec == 0 && pipe == 0)
+ set_bit(i, adev->gfx.mec.queue_bitmap);
+ }
+
+ /* update the number of active compute rings */
+ adev->gfx.num_compute_rings =
+ bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* If you hit this case and edited the policy, you probably just
+ * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+ if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
{
int r;
u32 *hpd;
size_t mec_hpd_size;
+ bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
switch (adev->asic_type) {
case CHIP_FIJI:
case CHIP_TONGA:
@@ -1440,8 +1472,10 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
- /* only 1 pipe of the first MEC is owned by amdgpu */
- mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX8_MEC_HPD_SIZE;
+ /* take ownership of the relevant compute queues */
+ gfx_v8_0_compute_queue_acquire(adev);
+
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
@@ -2088,7 +2122,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v8_0_sw_init(void *handle)
{
- int i, r;
+ int i, r, ring_id;
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2155,29 +2189,42 @@ static int gfx_v8_0_sw_init(void *handle)
}
/* set up the compute queues */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
unsigned irq_type;
- /* max 32 queues per MEC */
- if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
- DRM_ERROR("Too many (%d) compute rings!\n", i);
+ if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ continue;
+
+ if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
break;
- }
- ring = &adev->gfx.compute_ring[i];
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec)
+ + 1;
+ ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
ring->ring_obj = NULL;
ring->use_doorbell = true;
- ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
- ring->me = 1; /* first MEC */
- ring->pipe = i / 8;
- ring->queue = i % 8;
- ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX8_MEC_HPD_SIZE);
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE);
+ ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
- irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
irq_type);
if (r)
return r;
+
+ ring_id++;
}
r = gfx_v8_0_kiq_init(adev);
@@ -5659,7 +5706,7 @@ static int gfx_v8_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
gfx_v8_0_set_ring_funcs(adev);
gfx_v8_0_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ec2ae6f85033..45d31c36c1b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -38,7 +38,6 @@
#include "v9_structs.h"
#define GFX9_NUM_GFX_RINGS 1
-#define GFX9_NUM_COMPUTE_RINGS 8
#define GFX9_NUM_SE 4
#define GFX9_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
@@ -475,6 +474,37 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
}
}
+static void gfx_v9_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, queue, pipe, mec;
+
+ /* policy for amdgpu compute queue ownership */
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+ queue = i % adev->gfx.mec.num_queue_per_pipe;
+ pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ mec = (i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec;
+
+ /* we've run out of HW */
+ if (mec >= adev->gfx.mec.num_mec)
+ break;
+
+ /* policy: amdgpu owns all queues in the first pipe */
+ if (mec == 0 && pipe == 0)
+ set_bit(i, adev->gfx.mec.queue_bitmap);
+ }
+
+ /* update the number of active compute rings */
+ adev->gfx.num_compute_rings =
+ bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* If you hit this case and edited the policy, you probably just
+ * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+ if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
{
int r;
@@ -486,6 +516,8 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
const struct gfx_firmware_header_v1_0 *mec_hdr;
+ bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
switch (adev->asic_type) {
case CHIP_VEGA10:
adev->gfx.mec.num_mec = 2;
@@ -498,8 +530,9 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
- /* only 1 pipe of the first MEC is owned by amdgpu */
- mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX9_MEC_HPD_SIZE;
+ /* take ownership of the relevant compute queues */
+ gfx_v9_0_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
if (adev->gfx.mec.hpd_eop_obj == NULL) {
r = amdgpu_bo_create(adev,
@@ -1028,7 +1061,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
static int gfx_v9_0_sw_init(void *handle)
{
- int i, r;
+ int i, r, ring_id;
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1085,7 +1118,46 @@ static int gfx_v9_0_sw_init(void *handle)
}
/* set up the compute queues */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
+ unsigned irq_type;
+
+ if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ continue;
+
+ if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
+ break;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec)
+ + 1;
+ ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE);
+ ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+
+ /* set up the compute queues */
+ for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
unsigned irq_type;
/* max 32 queues per MEC */
@@ -2468,7 +2540,7 @@ static int gfx_v9_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v9_0_set_ring_funcs(adev);
gfx_v9_0_set_irq_funcs(adev);
gfx_v9_0_set_gds_init(adev);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index a09d9f352871..67f6d1921f4c 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -33,6 +33,7 @@
struct pci_dev;
#define KFD_INTERFACE_VERSION 1
+#define KGD_MAX_QUEUES 128
struct kfd_dev;
struct kgd_dev;