summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compote.c240
-rw-r--r--compote.h22
-rw-r--r--new.c14
-rw-r--r--old.c13
4 files changed, 286 insertions, 3 deletions
diff --git a/compote.c b/compote.c
index d32987b..4c5d5eb 100644
--- a/compote.c
+++ b/compote.c
@@ -189,3 +189,243 @@ void *malloc_below40(unsigned nbytes)
return ptr;
}
+
+struct nvk_00f0_cp_desc {
+ uint32_t unk0[8];
+ uint32_t entry;
+ uint32_t unk9[2];
+ uint32_t unk11_0 : 30;
+ uint32_t linked_tsc : 1;
+ uint32_t unk11_31 : 1;
+ uint32_t griddim_x : 31;
+ uint32_t unk12 : 1;
+ uint16_t griddim_y;
+ uint16_t unk13;
+ uint16_t griddim_z;
+ uint16_t unk14;
+ uint32_t unk15[2];
+ uint32_t shared_size : 18;
+ uint32_t unk17 : 14;
+ uint16_t unk18;
+ uint16_t blockdim_x;
+ uint16_t blockdim_y;
+ uint16_t blockdim_z;
+ uint32_t cb_mask : 8;
+ uint32_t unk20 : 24;
+ uint32_t unk21[8];
+ uint32_t local_size_p : 24;
+ uint32_t unk29 : 3;
+ uint32_t bar_alloc : 5;
+ uint32_t local_size_n : 24;
+ uint32_t gpr_alloc : 8;
+ uint32_t cstack_size : 24;
+ uint32_t unk31 : 8;
+ struct {
+ uint32_t address_l;
+ uint32_t address_h : 17;
+ uint32_t reserved : 2;
+ uint32_t size_sh4 : 13;
+ } cb[8];
+ uint32_t unk48[16];
+};
+
+#define NVK_00f0_GRAPH_SERIALIZE 0x0110
+#define NVK_00f0_SHARED_BASE 0x0214
+#define NVK_00f0_UNK0248 0x0248
+#define NVK_00f0_TEMP_SIZE_HIGH0 0x02e4
+#define NVK_00f0_TEMP_SIZE_HIGH1 0x02f0
+#define NVK_00f0_UNK0310 0x0310
+#define NVK_00f0_TEMP_ADDRESS_HIGH 0x0790
+#define NVK_00f0_LOCAL_BASE 0x077c
+
+int test_compute(compote_context_t *ctx, void *buffer, void *dstp, unsigned dimx)
+{
+ uint64_t code[] = {
+ //testtx:
+ // sched 0x7f1 0x207f9 0x7f9
+ // mov $r2 $tid.x
+ // lea 0x1 cc $r0 $r2 c0[0x0] 0x2
+ // lea hi x 0x1 $r1 $r2 c0[0x4] 0x0 0x2
+ // sched 0x7f9 0x7f5 0x7ff
+ // stg e b32 ncg[$r0] $r2
+ // exit
+ // exit
+ 0x001fe440ff2007f1ul,
+ 0xf0c8000002170002ul,
+ 0x4bd7810000070200ul,
+ 0x1a177f8000170201ul,
+ 0x001ffc00fea007f9ul,
+ 0xeedc200000070002ul,
+ 0xe30000000007000ful,
+ 0xe30000000007000ful,
+ };
+ uint32_t *ptr = buffer;
+ unsigned idx = 0;
+ uint32_t *param, *dst, *fence, cmd_start, *tls;
+ struct nvk_00f0_cp_desc *desc;
+ uint64_t cmd_addr, dst_addr, code_addr, tls_size;
+ uint64_t desc_addr, param_addr, fence_addr, tls_addr;
+ int ret;
+
+ tls_size = 16 << 20;
+ tls = (void*)&ptr[idx];
+ tls_addr = (unsigned long)tls;
+ idx += tls_size >> 2;
+
+ dst = (void*)dstp;
+ dst_addr = (unsigned long)dstp;
+
+ fence = (void*)&ptr[idx];
+ fence_addr = (unsigned long)fence;
+ idx += 64;
+
+ idx = (idx + 255) & (~255);
+ code_addr = (unsigned long)&ptr[idx];
+ memcpy(&ptr[idx], code, sizeof(code));
+ idx += sizeof(code) >> 2;
+
+ idx = (idx + 255) & (~255);
+ param = &ptr[idx];
+ param_addr = (unsigned long)param;
+ param[0] = nvk_addr_low(dst_addr);
+ param[1] = nvk_addr_high(dst_addr);
+ param[2] = 0x00000004;
+ idx += 256;
+
+ idx = (idx + 255) & (~255);
+ desc = (void *)&ptr[idx];
+ desc_addr = (unsigned long)desc;
+ memset(desc, 0, sizeof(*desc));
+ desc->griddim_x = dimx / 16;
+ desc->griddim_y = 1;
+ desc->griddim_z = 1;
+ desc->blockdim_x = 16;
+ desc->blockdim_y = 1;
+ desc->blockdim_z = 1;
+ desc->entry = 0x0;
+ desc->shared_size = 0;
+ desc->local_size_p = 0;
+ desc->bar_alloc = 0;
+ desc->local_size_n = 0;
+ desc->gpr_alloc = 8;
+ desc->cstack_size = 0x1000;
+ desc->cb_mask = 1;
+ desc->cb[0].address_h = nvk_addr_high(param_addr);
+ desc->cb[0].address_l = nvk_addr_low(param_addr);
+ desc->cb[0].size_sh4 = 256 >> 4;
+ desc->unk0[4] = 0x40;
+ desc->unk11_0 = 0x04014000;
+ idx += sizeof(*desc) >> 2;
+
+ cmd_addr = (unsigned long)&ptr[idx];
+ cmd_start = idx;
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x0000, 1);
+ ptr[idx++] = 0xc1c0;
+ ptr[idx++] = nvk_sq_cmd(1, NVK_00f0_GRAPH_SERIALIZE, 1);
+ ptr[idx++] = 0x00000000;
+
+ tls_size = tls_size / 16;
+ ptr[idx++] = nvk_sq_cmd(1, 0x0790, 2);
+ ptr[idx++] = nvk_addr_high(tls_addr);
+ ptr[idx++] = nvk_addr_low(tls_addr);
+ ptr[idx++] = nvk_sq_cmd(1, 0x02e4, 3);
+ ptr[idx++] = nvk_addr_high(tls_size);
+ ptr[idx++] = nvk_addr_low(tls_size) & ~0x7fff;
+ ptr[idx++] = 0x000000ff;
+ ptr[idx++] = nvk_sq_cmd(1, 0x02f0, 3);
+ ptr[idx++] = nvk_addr_high(tls_size);
+ ptr[idx++] = nvk_addr_low(tls_size) & ~0x7fff;
+ ptr[idx++] = 0x000000ff;
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x077c, 1);
+ ptr[idx++] = 0xff000000;
+ ptr[idx++] = nvk_sq_cmd(1, 0x0214, 1);
+ ptr[idx++] = 0xfe000000;
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x1608, 2);
+ ptr[idx++] = nvk_addr_high(code_addr);
+ ptr[idx++] = nvk_addr_low(code_addr);
+ ptr[idx++] = nvk_sq_cmd(1, 0x0310, 1);
+ ptr[idx++] = 0x00000400;
+
+ ptr[idx++] = nvk_ni_cmd(1, 0x0248, 64);
+ for (int i = 63; i >= 0; --i) {
+ ptr[idx++] = 0x00038000 | i;
+ }
+ ptr[idx++] = nvk_ni_cmd(1, 0x0110, 1);
+ ptr[idx++] = 0x00000000;
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x2608, 1);
+ ptr[idx++] = 0x00000000;
+
+#if 0
+ ptr[idx++] = nvk_sq_cmd(1, 0x0180, 4);
+ ptr[idx++] = sizeof(code);
+ ptr[idx++] = 0x00000001;
+ ptr[idx++] = nvk_addr_high(code_addr);
+ ptr[idx++] = nvk_addr_low(code_addr);
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x01b0, 1);
+ ptr[idx++] = 0x00000041;
+ ptr[idx++] = nvk_ni_cmd(1, 0x01b4, sizeof(code)/4);
+ for (int i = 0; i < sizeof(code) / 4; ++i) {
+ ptr[idx++] = code[i];
+ }
+ ptr[idx++] = nvk_ni_cmd(1, NVK_00f0_GRAPH_SERIALIZE, 1);
+ ptr[idx++] = 0x00000000;
+ ptr[idx++] = nvk_sq_cmd(1, 0x0180, 4);
+ ptr[idx++] = 12;
+ ptr[idx++] = 1;
+ ptr[idx++] = nvk_addr_high(param_addr);
+ ptr[idx++] = nvk_addr_low(param_addr);
+ ptr[idx++] = nvk_sq_cmd(1, 0x01b0, 1);
+ ptr[idx++] = 0x00000041;
+#endif
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x1698, 1);
+ ptr[idx++] = 0x00001000;
+ ptr[idx++] = nvk_sq_cmd(1, 0x021c, 1);
+ ptr[idx++] = 0x00001017;
+
+#if 0
+printf("param addr 0x%016lx > 40buts ? %d\n", param_addr, param_addr > (1UL << 40));
+ ptr[idx++] = nvk_sq_cmd(1, 0x0274, 3);
+ ptr[idx++] = nvk_addr_high(param_addr);
+ ptr[idx++] = nvk_addr_low(param_addr);
+ ptr[idx++] = 0x000000ff;
+#endif
+
+ ptr[idx++] = nvk_sq_cmd(1, 0x02b4, 1);
+ ptr[idx++] = desc_addr >> 8;
+ ptr[idx++] = nvk_sq_cmd(1, 0x02bc, 1);
+ ptr[idx++] = 0x00000003;
+#if 0 // BAD
+#endif // BAD
+ ptr[idx++] = nvk_sq_cmd(1, 0x0110, 1);
+ ptr[idx++] = 0x00000000;
+
+ fence[0] = 0xcafedead;
+ ptr[idx++] = nvk_sq_cmd(1, 0x1b00, 4);
+ ptr[idx++] = nvk_addr_high(fence_addr);
+ ptr[idx++] = nvk_addr_low(fence_addr);
+ ptr[idx++] = 0xdeadcafe;
+ ptr[idx++] = 0x00000000;
+
+ ret = compote_context_execute(ctx, (void*)cmd_addr, ((idx - cmd_start) + 1));
+ if (ret) {
+ printf("compote_context_execute() error %d\n", ret);
+ return ret;
+ }
+
+ for (int i = 0; i < 3; i++) {
+ printf("fence: 0x%08x 0x%08x 0x%08x 0x%08x\n", fence[0], fence[1], fence[2], fence[3]);
+ printf("dst: 0x%08x 0x%08x 0x%08x 0x%08x\n", dst[0], dst[1], dst[2], dst[3]);
+ if (dst[3] == 0x3)
+ break;
+ sleep(1);
+ }
+ printf("dst: 0x%08x 0x%08x 0x%08x 0x%08x\n", dst[0], dst[1], dst[2], dst[3]);
+
+ return 0;
+}
diff --git a/compote.h b/compote.h
index 088e92b..6d0cc3b 100644
--- a/compote.h
+++ b/compote.h
@@ -69,4 +69,26 @@ static inline uint32_t nvk_ni_cmd(unsigned subc, unsigned method, unsigned len)
(0x3 << 29);
}
+static inline uint32_t nvk_addr_high(uint64_t offset)
+{
+ return (offset >> 32) & 0xffffffff;
+}
+
+static inline uint32_t nvk_addr_low(uint64_t offset)
+{
+ return offset & 0xffffffff;
+}
+
+static inline uint32_t nvk_size_high(uint64_t offset)
+{
+ return (offset >> 32) & 0xffffffff;
+}
+
+static inline uint32_t nvk_size_low(uint64_t offset)
+{
+ return offset & 0xffffffff;
+}
+
+int test_compute(compote_context_t *ctx, void *buffer, void *dstp, unsigned dimx);
+
#endif // COMPOTE_H
diff --git a/new.c b/new.c
index f066ad9..cee2f20 100644
--- a/new.c
+++ b/new.c
@@ -32,6 +32,7 @@ int main(int argc, char *argv[])
{
compote_context_t *ctx;
compote_mo_t *mo;
+ void *dst;
int ret;
ret = compote_context_new(&ctx);
@@ -39,11 +40,18 @@ int main(int argc, char *argv[])
return ret;
}
- ret = compote_mo_new(ctx, &mo, 64 << 10);
+ ret = compote_mo_new(ctx, &mo, 32 << 20);
if (ret) {
goto out;
}
+ dst = malloc_below40(4 << 20);
+ if (dst == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ printf("dst addr %p\n", dst);
+
{
uint32_t *ptr = mo->ptr;
uint32_t *sem = &ptr[128 >> 2];
@@ -67,6 +75,10 @@ int main(int argc, char *argv[])
printf("[%4d] = 0x%08x 0x%08x 0x%08x\n", 128 >> 2, sem[0], sem[1], sem[2]);
}
+ ret = test_compute(ctx, mo->ptr, dst, 1024);
+ if (ret)
+ goto out;
+
printf("La compote c'est bon !\n");
out:
diff --git a/old.c b/old.c
index f066ad9..117e50a 100644
--- a/old.c
+++ b/old.c
@@ -31,7 +31,7 @@
int main(int argc, char *argv[])
{
compote_context_t *ctx;
- compote_mo_t *mo;
+ compote_mo_t *mo, *dst;
int ret;
ret = compote_context_new(&ctx);
@@ -39,7 +39,12 @@ int main(int argc, char *argv[])
return ret;
}
- ret = compote_mo_new(ctx, &mo, 64 << 10);
+ ret = compote_mo_new(ctx, &mo, 32 << 20);
+ if (ret) {
+ goto out;
+ }
+
+ ret = compote_mo_new(ctx, &dst, 4 << 20);
if (ret) {
goto out;
}
@@ -67,6 +72,10 @@ int main(int argc, char *argv[])
printf("[%4d] = 0x%08x 0x%08x 0x%08x\n", 128 >> 2, sem[0], sem[1], sem[2]);
}
+ ret = test_compute(ctx, mo->ptr, dst->ptr, 1024);
+ if (ret)
+ goto out;
+
printf("La compote c'est bon !\n");
out: