summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJérôme Glisse <jglisse@redhat.com>2017-09-05 14:55:09 -0400
committerJérôme Glisse <jglisse@redhat.com>2017-09-05 14:55:09 -0400
commite6a27ea4768202292070ab9495fd086aad9e3380 (patch)
treeb4784d7c0d6137d0af29e7fad9845cc50f9193c1
parentca39394948c9f78f330c35cc4ba30acd6df8c2ed (diff)
compote: add simple test shader to trigger page faultHEADmaster
Simple shader that write the thread x id to buffer[threadid.x] ie pseudo code: buffer[threadid.x] = threadid.x; In the old way the buffer is allocated with regular GPU memory allocation and everything is pin while in the new way this is simply malloc memory and the GPU might trigger a page fault if there is no page backing the buffer addresses yet. Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
-rw-r--r--compote.c240
-rw-r--r--compote.h22
-rw-r--r--new.c14
-rw-r--r--old.c13
4 files changed, 286 insertions, 3 deletions
diff --git a/compote.c b/compote.c
index d32987b..4c5d5eb 100644
--- a/compote.c
+++ b/compote.c
@@ -189,3 +189,243 @@ void *malloc_below40(unsigned nbytes)
return ptr;
}
+
+struct nvk_00f0_cp_desc {
+ uint32_t unk0[8];
+ uint32_t entry;
+ uint32_t unk9[2];
+ uint32_t unk11_0 : 30;
+ uint32_t linked_tsc : 1;
+ uint32_t unk11_31 : 1;
+ uint32_t griddim_x : 31;
+ uint32_t unk12 : 1;
+ uint16_t griddim_y;
+ uint16_t unk13;
+ uint16_t griddim_z;
+ uint16_t unk14;
+ uint32_t unk15[2];
+ uint32_t shared_size : 18;
+ uint32_t unk17 : 14;
+ uint16_t unk18;
+ uint16_t blockdim_x;
+ uint16_t blockdim_y;
+ uint16_t blockdim_z;
+ uint32_t cb_mask : 8;
+ uint32_t unk20 : 24;
+ uint32_t unk21[8];
+ uint32_t local_size_p : 24;
+ uint32_t unk29 : 3;
+ uint32_t bar_alloc : 5;
+ uint32_t local_size_n : 24;
+ uint32_t gpr_alloc : 8;
+ uint32_t cstack_size : 24;
+ uint32_t unk31 : 8;
+ struct {
+ uint32_t address_l;
+ uint32_t address_h : 17;
+ uint32_t reserved : 2;
+ uint32_t size_sh4 : 13;
+ } cb[8];
+ uint32_t unk48[16];
+};
+
+#define NVK_00f0_GRAPH_SERIALIZE 0x0110
+#define NVK_00f0_SHARED_BASE 0x0214
+#define NVK_00f0_UNK0248 0x0248
+#define NVK_00f0_TEMP_SIZE_HIGH0 0x02e4
+#define NVK_00f0_TEMP_SIZE_HIGH1 0x02f0
+#define NVK_00f0_UNK0310 0x0310
+#define NVK_00f0_TEMP_ADDRESS_HIGH 0x0790
+#define NVK_00f0_LOCAL_BASE 0x077c
+
+int test_compute(compote_context_t *ctx, void *buffer, void *dstp, unsigned dimx)
+{
+ uint64_t code[] = {
+ //testtx:
+ // sched 0x7f1 0x207f9 0x7f9
+ // mov $r2 $tid.x
+ // lea 0x1 cc $r0 $r2 c0[0x0] 0x2
+ // lea hi x 0x1 $r1 $r2 c0[0x4] 0x0 0x2
+ // sched 0x7f9 0x7f5 0x7ff
+ // stg e b32 ncg[$r0] $r2
+ // exit
+ // exit
+ 0x001fe440ff2007f1ul,
+ 0xf0c8000002170002ul,
+ 0x4bd7810000070200ul,
+ 0x1a177f8000170201ul,
+ 0x001ffc00fea007f9ul,
+ 0xeedc200000070002ul,
+ 0xe30000000007000ful,
+ 0xe30000000007000ful,
+ };
+ uint32_t *ptr = buffer;
+ unsigned idx = 0;
+ uint32_t *param, *dst, *fence, cmd_start, *tls;
+ struct nvk_00f0_cp_desc *desc;
+ uint64_t cmd_addr, dst_addr, code_addr, tls_size;
+ uint64_t desc_addr, param_addr, fence_addr, tls_addr;
+ int ret;
+
+ tls_size = 16 << 20;
+ tls = (void*)&ptr[idx];
+ tls_addr = (unsigned long)tls;
+ idx += tls_size >> 2;
+
+ dst = (void*)dstp;
+ dst_addr = (unsigned long)dstp;
+
+ fence = (void*)&ptr[idx];
+ fence_addr = (unsigned long)fence;
+ idx += 64;
+
+ idx = (idx + 255) & (~255);
+ code_addr = (unsigned long)&ptr[idx];
+ memcpy(&ptr[idx], code, sizeof(code));
+ idx += sizeof(code) >> 2;
+
+ idx = (idx + 255) & (~255);
+ param = &ptr[idx];
+ param_addr = (unsigned long)param;
+ param[0] = nvk_addr_low(dst_addr);
+ param[1] = nvk_addr_high(dst_addr);
+ param[2] = 0x00000004;
+ idx += 256;
+
+ idx = (idx + 255) & (~255);
+ desc = (void *)&ptr[idx];
+ desc_addr = (unsigned long)desc;
+ memset(desc, 0, sizeof(*desc));
+ desc->griddim_x = dimx / 16;
+ desc->griddim_y = 1;
+ desc->griddim_z = 1;
+ desc->blockdim_x = 16;
+ desc->blockdim_y = 1;
+ desc->blockdim_z = 1;
+ desc->entry = 0x0;
+ desc->shared_size = 0;
+ desc->local_size_p = 0;
+ desc->bar_alloc = 0;
+ desc->local_size_n = 0;
+ desc->gpr_alloc = 8;
+ desc->cstack_size = 0x1000;
+ desc->cb_mask = 1;
+ desc->cb[0].address_h = nvk_addr_high(param_addr);
+ desc->cb[0].address_l = nvk_addr_low(param_addr);
+ desc->cb[0].size_sh4 = 256 >> 4;
+ desc->unk0[4] = 0x40;
+ desc->unk11_0 = 0x04014000;
+ idx += sizeof(*desc) >> 2;
+
+ cmd_addr = (unsigned long)&ptr[idx];
+ cmd_start = idx;
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x0000, 1);
+ ptr[idx++] = 0xc1c0;
+ ptr[idx++] = nvk_sq_cmd(1, NVK_00f0_GRAPH_SERIALIZE, 1);
+ ptr[idx++] = 0x00000000;
+
+ tls_size = tls_size / 16;
+ ptr[idx++] = nvk_sq_cmd(1, 0x0790, 2);
+ ptr[idx++] = nvk_addr_high(tls_addr);
+ ptr[idx++] = nvk_addr_low(tls_addr);
+ ptr[idx++] = nvk_sq_cmd(1, 0x02e4, 3);
+ ptr[idx++] = nvk_addr_high(tls_size);
+ ptr[idx++] = nvk_addr_low(tls_size) & ~0x7fff;
+ ptr[idx++] = 0x000000ff;
+ ptr[idx++] = nvk_sq_cmd(1, 0x02f0, 3);
+ ptr[idx++] = nvk_addr_high(tls_size);
+ ptr[idx++] = nvk_addr_low(tls_size) & ~0x7fff;
+ ptr[idx++] = 0x000000ff;
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x077c, 1);
+ ptr[idx++] = 0xff000000;
+ ptr[idx++] = nvk_sq_cmd(1, 0x0214, 1);
+ ptr[idx++] = 0xfe000000;
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x1608, 2);
+ ptr[idx++] = nvk_addr_high(code_addr);
+ ptr[idx++] = nvk_addr_low(code_addr);
+ ptr[idx++] = nvk_sq_cmd(1, 0x0310, 1);
+ ptr[idx++] = 0x00000400;
+
+ ptr[idx++] = nvk_ni_cmd(1, 0x0248, 64);
+ for (int i = 63; i >= 0; --i) {
+ ptr[idx++] = 0x00038000 | i;
+ }
+ ptr[idx++] = nvk_ni_cmd(1, 0x0110, 1);
+ ptr[idx++] = 0x00000000;
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x2608, 1);
+ ptr[idx++] = 0x00000000;
+
+#if 0
+ ptr[idx++] = nvk_sq_cmd(1, 0x0180, 4);
+ ptr[idx++] = sizeof(code);
+ ptr[idx++] = 0x00000001;
+ ptr[idx++] = nvk_addr_high(code_addr);
+ ptr[idx++] = nvk_addr_low(code_addr);
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x01b0, 1);
+ ptr[idx++] = 0x00000041;
+ ptr[idx++] = nvk_ni_cmd(1, 0x01b4, sizeof(code)/4);
+ for (int i = 0; i < sizeof(code) / 4; ++i) {
+ ptr[idx++] = code[i];
+ }
+ ptr[idx++] = nvk_ni_cmd(1, NVK_00f0_GRAPH_SERIALIZE, 1);
+ ptr[idx++] = 0x00000000;
+ ptr[idx++] = nvk_sq_cmd(1, 0x0180, 4);
+ ptr[idx++] = 12;
+ ptr[idx++] = 1;
+ ptr[idx++] = nvk_addr_high(param_addr);
+ ptr[idx++] = nvk_addr_low(param_addr);
+ ptr[idx++] = nvk_sq_cmd(1, 0x01b0, 1);
+ ptr[idx++] = 0x00000041;
+#endif
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x1698, 1);
+ ptr[idx++] = 0x00001000;
+ ptr[idx++] = nvk_sq_cmd(1, 0x021c, 1);
+ ptr[idx++] = 0x00001017;
+
+#if 0
+printf("param addr 0x%016lx > 40buts ? %d\n", param_addr, param_addr > (1UL << 40));
+ ptr[idx++] = nvk_sq_cmd(1, 0x0274, 3);
+ ptr[idx++] = nvk_addr_high(param_addr);
+ ptr[idx++] = nvk_addr_low(param_addr);
+ ptr[idx++] = 0x000000ff;
+#endif
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x02b4, 1);
+ ptr[idx++] = desc_addr >> 8;
+ ptr[idx++] = nvk_sq_cmd(1, 0x02bc, 1);
+ ptr[idx++] = 0x00000003;
+#if 0 // BAD
+#endif // BAD
+ ptr[idx++] = nvk_sq_cmd(1, 0x0110, 1);
+ ptr[idx++] = 0x00000000;
+
+ fence[0] = 0xcafedead;
+ ptr[idx++] = nvk_sq_cmd(1, 0x1b00, 4);
+ ptr[idx++] = nvk_addr_high(fence_addr);
+ ptr[idx++] = nvk_addr_low(fence_addr);
+ ptr[idx++] = 0xdeadcafe;
+ ptr[idx++] = 0x00000000;
+
+ ret = compote_context_execute(ctx, (void*)cmd_addr, ((idx - cmd_start) + 1));
+ if (ret) {
+ printf("compote_context_execute() error %d\n", ret);
+ return ret;
+ }
+
+ for (int i = 0; i < 3; i++) {
+ printf("fence: 0x%08x 0x%08x 0x%08x 0x%08x\n", fence[0], fence[1], fence[2], fence[3]);
+ printf("dst: 0x%08x 0x%08x 0x%08x 0x%08x\n", dst[0], dst[1], dst[2], dst[3]);
+ if (dst[3] == 0x3)
+ break;
+ sleep(1);
+ }
+ printf("dst: 0x%08x 0x%08x 0x%08x 0x%08x\n", dst[0], dst[1], dst[2], dst[3]);
+
+ return 0;
+}
diff --git a/compote.h b/compote.h
index 088e92b..6d0cc3b 100644
--- a/compote.h
+++ b/compote.h
@@ -69,4 +69,26 @@ static inline uint32_t nvk_ni_cmd(unsigned subc, unsigned method, unsigned len)
(0x3 << 29);
}
+static inline uint32_t nvk_addr_high(uint64_t offset)
+{
+ return (offset >> 32) & 0xffffffff;
+}
+
+static inline uint32_t nvk_addr_low(uint64_t offset)
+{
+ return offset & 0xffffffff;
+}
+
+static inline uint32_t nvk_size_high(uint64_t offset)
+{
+ return (offset >> 32) & 0xffffffff;
+}
+
+static inline uint32_t nvk_size_low(uint64_t offset)
+{
+ return offset & 0xffffffff;
+}
+
+int test_compute(compote_context_t *ctx, void *buffer, void *dstp, unsigned dimx);
+
#endif // COMPOTE_H
diff --git a/new.c b/new.c
index f066ad9..cee2f20 100644
--- a/new.c
+++ b/new.c
@@ -32,6 +32,7 @@ int main(int argc, char *argv[])
{
compote_context_t *ctx;
compote_mo_t *mo;
+ void *dst;
int ret;
ret = compote_context_new(&ctx);
@@ -39,11 +40,18 @@ int main(int argc, char *argv[])
return ret;
}
- ret = compote_mo_new(ctx, &mo, 64 << 10);
+ ret = compote_mo_new(ctx, &mo, 32 << 20);
if (ret) {
goto out;
}
+ dst = malloc_below40(4 << 20);
+ if (dst == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ printf("dst addr %p\n", dst);
+
{
uint32_t *ptr = mo->ptr;
uint32_t *sem = &ptr[128 >> 2];
@@ -67,6 +75,10 @@ int main(int argc, char *argv[])
printf("[%4d] = 0x%08x 0x%08x 0x%08x\n", 128 >> 2, sem[0], sem[1], sem[2]);
}
+ ret = test_compute(ctx, mo->ptr, dst, 1024);
+ if (ret)
+ goto out;
+
printf("La compote c'est bon !\n");
out:
diff --git a/old.c b/old.c
index f066ad9..117e50a 100644
--- a/old.c
+++ b/old.c
@@ -31,7 +31,7 @@
int main(int argc, char *argv[])
{
compote_context_t *ctx;
- compote_mo_t *mo;
+ compote_mo_t *mo, *dst;
int ret;
ret = compote_context_new(&ctx);
@@ -39,7 +39,12 @@ int main(int argc, char *argv[])
return ret;
}
- ret = compote_mo_new(ctx, &mo, 64 << 10);
+ ret = compote_mo_new(ctx, &mo, 32 << 20);
+ if (ret) {
+ goto out;
+ }
+
+ ret = compote_mo_new(ctx, &dst, 4 << 20);
if (ret) {
goto out;
}
@@ -67,6 +72,10 @@ int main(int argc, char *argv[])
printf("[%4d] = 0x%08x 0x%08x 0x%08x\n", 128 >> 2, sem[0], sem[1], sem[2]);
}
+ ret = test_compute(ctx, mo->ptr, dst->ptr, 1024);
+ if (ret)
+ goto out;
+
printf("La compote c'est bon !\n");
out: