diff options
-rw-r--r-- | compote.c | 240 | ||||
-rw-r--r-- | compote.h | 22 | ||||
-rw-r--r-- | new.c | 14 | ||||
-rw-r--r-- | old.c | 13 |
4 files changed, 286 insertions, 3 deletions
@@ -189,3 +189,243 @@ void *malloc_below40(unsigned nbytes) return ptr; } + +struct nvk_00f0_cp_desc { + uint32_t unk0[8]; + uint32_t entry; + uint32_t unk9[2]; + uint32_t unk11_0 : 30; + uint32_t linked_tsc : 1; + uint32_t unk11_31 : 1; + uint32_t griddim_x : 31; + uint32_t unk12 : 1; + uint16_t griddim_y; + uint16_t unk13; + uint16_t griddim_z; + uint16_t unk14; + uint32_t unk15[2]; + uint32_t shared_size : 18; + uint32_t unk17 : 14; + uint16_t unk18; + uint16_t blockdim_x; + uint16_t blockdim_y; + uint16_t blockdim_z; + uint32_t cb_mask : 8; + uint32_t unk20 : 24; + uint32_t unk21[8]; + uint32_t local_size_p : 24; + uint32_t unk29 : 3; + uint32_t bar_alloc : 5; + uint32_t local_size_n : 24; + uint32_t gpr_alloc : 8; + uint32_t cstack_size : 24; + uint32_t unk31 : 8; + struct { + uint32_t address_l; + uint32_t address_h : 17; + uint32_t reserved : 2; + uint32_t size_sh4 : 13; + } cb[8]; + uint32_t unk48[16]; +}; + +#define NVK_00f0_GRAPH_SERIALIZE 0x0110 +#define NVK_00f0_SHARED_BASE 0x0214 +#define NVK_00f0_UNK0248 0x0248 +#define NVK_00f0_TEMP_SIZE_HIGH0 0x02e4 +#define NVK_00f0_TEMP_SIZE_HIGH1 0x02f0 +#define NVK_00f0_UNK0310 0x0310 +#define NVK_00f0_TEMP_ADDRESS_HIGH 0x0790 +#define NVK_00f0_LOCAL_BASE 0x077c + +int test_compute(compote_context_t *ctx, void *buffer, void *dstp, unsigned dimx) +{ + uint64_t code[] = { + //testtx: + // sched 0x7f1 0x207f9 0x7f9 + // mov $r2 $tid.x + // lea 0x1 cc $r0 $r2 c0[0x0] 0x2 + // lea hi x 0x1 $r1 $r2 c0[0x4] 0x0 0x2 + // sched 0x7f9 0x7f5 0x7ff + // stg e b32 ncg[$r0] $r2 + // exit + // exit + 0x001fe440ff2007f1ul, + 0xf0c8000002170002ul, + 0x4bd7810000070200ul, + 0x1a177f8000170201ul, + 0x001ffc00fea007f9ul, + 0xeedc200000070002ul, + 0xe30000000007000ful, + 0xe30000000007000ful, + }; + uint32_t *ptr = buffer; + unsigned idx = 0; + uint32_t *param, *dst, *fence, cmd_start, *tls; + struct nvk_00f0_cp_desc *desc; + uint64_t cmd_addr, dst_addr, code_addr, tls_size; + uint64_t desc_addr, param_addr, fence_addr, tls_addr; + int ret; + + tls_size = 16 << 20; + tls = (void*)&ptr[idx]; + tls_addr = (unsigned long)tls; + idx += tls_size >> 2; + + dst = (void*)dstp; + dst_addr = (unsigned long)dstp; + + fence = (void*)&ptr[idx]; + fence_addr = (unsigned long)fence; + idx += 64; + + idx = (idx + 255) & (~255); + code_addr = (unsigned long)&ptr[idx]; + memcpy(&ptr[idx], code, sizeof(code)); + idx += sizeof(code) >> 2; + + idx = (idx + 255) & (~255); + param = &ptr[idx]; + param_addr = (unsigned long)param; + param[0] = nvk_addr_low(dst_addr); + param[1] = nvk_addr_high(dst_addr); + param[2] = 0x00000004; + idx += 256; + + idx = (idx + 255) & (~255); + desc = (void *)&ptr[idx]; + desc_addr = (unsigned long)desc; + memset(desc, 0, sizeof(*desc)); + desc->griddim_x = dimx / 16; + desc->griddim_y = 1; + desc->griddim_z = 1; + desc->blockdim_x = 16; + desc->blockdim_y = 1; + desc->blockdim_z = 1; + desc->entry = 0x0; + desc->shared_size = 0; + desc->local_size_p = 0; + desc->bar_alloc = 0; + desc->local_size_n = 0; + desc->gpr_alloc = 8; + desc->cstack_size = 0x1000; + desc->cb_mask = 1; + desc->cb[0].address_h = nvk_addr_high(param_addr); + desc->cb[0].address_l = nvk_addr_low(param_addr); + desc->cb[0].size_sh4 = 256 >> 4; + desc->unk0[4] = 0x40; + desc->unk11_0 = 0x04014000; + idx += sizeof(*desc) >> 2; + + cmd_addr = (unsigned long)&ptr[idx]; + cmd_start = idx; + + ptr[idx++] = nvk_sq_cmd(1, 0x0000, 1); + ptr[idx++] = 0xc1c0; + ptr[idx++] = nvk_sq_cmd(1, NVK_00f0_GRAPH_SERIALIZE, 1); + ptr[idx++] = 0x00000000; + + tls_size = tls_size / 16; + ptr[idx++] = nvk_sq_cmd(1, 0x0790, 2); + ptr[idx++] = nvk_addr_high(tls_addr); + ptr[idx++] = nvk_addr_low(tls_addr); + ptr[idx++] = nvk_sq_cmd(1, 0x02e4, 3); + ptr[idx++] = nvk_addr_high(tls_size); + ptr[idx++] = nvk_addr_low(tls_size) & ~0x7fff; + ptr[idx++] = 0x000000ff; + ptr[idx++] = nvk_sq_cmd(1, 0x02f0, 3); + ptr[idx++] = nvk_addr_high(tls_size); + ptr[idx++] = nvk_addr_low(tls_size) & ~0x7fff; + ptr[idx++] = 0x000000ff; + + ptr[idx++] = nvk_sq_cmd(1, 0x077c, 1); + ptr[idx++] = 0xff000000; + ptr[idx++] = nvk_sq_cmd(1, 0x0214, 1); + ptr[idx++] = 0xfe000000; + + ptr[idx++] = nvk_sq_cmd(1, 0x1608, 2); + ptr[idx++] = nvk_addr_high(code_addr); + ptr[idx++] = nvk_addr_low(code_addr); + ptr[idx++] = nvk_sq_cmd(1, 0x0310, 1); + ptr[idx++] = 0x00000400; + + ptr[idx++] = nvk_ni_cmd(1, 0x0248, 64); + for (int i = 63; i >= 0; --i) { + ptr[idx++] = 0x00038000 | i; + } + ptr[idx++] = nvk_ni_cmd(1, 0x0110, 1); + ptr[idx++] = 0x00000000; + + ptr[idx++] = nvk_sq_cmd(1, 0x2608, 1); + ptr[idx++] = 0x00000000; + +#if 0 + ptr[idx++] = nvk_sq_cmd(1, 0x0180, 4); + ptr[idx++] = sizeof(code); + ptr[idx++] = 0x00000001; + ptr[idx++] = nvk_addr_high(code_addr); + ptr[idx++] = nvk_addr_low(code_addr); + + ptr[idx++] = nvk_sq_cmd(1, 0x01b0, 1); + ptr[idx++] = 0x00000041; + ptr[idx++] = nvk_ni_cmd(1, 0x01b4, sizeof(code)/4); + for (int i = 0; i < sizeof(code) / 4; ++i) { + ptr[idx++] = code[i]; + } + ptr[idx++] = nvk_ni_cmd(1, NVK_00f0_GRAPH_SERIALIZE, 1); + ptr[idx++] = 0x00000000; + ptr[idx++] = nvk_sq_cmd(1, 0x0180, 4); + ptr[idx++] = 12; + ptr[idx++] = 1; + ptr[idx++] = nvk_addr_high(param_addr); + ptr[idx++] = nvk_addr_low(param_addr); + ptr[idx++] = nvk_sq_cmd(1, 0x01b0, 1); + ptr[idx++] = 0x00000041; +#endif + + ptr[idx++] = nvk_sq_cmd(1, 0x1698, 1); + ptr[idx++] = 0x00001000; + ptr[idx++] = nvk_sq_cmd(1, 0x021c, 1); + ptr[idx++] = 0x00001017; + +#if 0 +printf("param addr 0x%016lx > 40buts ? %d\n", param_addr, param_addr > (1UL << 40)); + ptr[idx++] = nvk_sq_cmd(1, 0x0274, 3); + ptr[idx++] = nvk_addr_high(param_addr); + ptr[idx++] = nvk_addr_low(param_addr); + ptr[idx++] = 0x000000ff; +#endif + + ptr[idx++] = nvk_sq_cmd(1, 0x02b4, 1); + ptr[idx++] = desc_addr >> 8; + ptr[idx++] = nvk_sq_cmd(1, 0x02bc, 1); + ptr[idx++] = 0x00000003; +#if 0 // BAD +#endif // BAD + ptr[idx++] = nvk_sq_cmd(1, 0x0110, 1); + ptr[idx++] = 0x00000000; + + fence[0] = 0xcafedead; + ptr[idx++] = nvk_sq_cmd(1, 0x1b00, 4); + ptr[idx++] = nvk_addr_high(fence_addr); + ptr[idx++] = nvk_addr_low(fence_addr); + ptr[idx++] = 0xdeadcafe; + ptr[idx++] = 0x00000000; + + ret = compote_context_execute(ctx, (void*)cmd_addr, ((idx - cmd_start) + 1)); + if (ret) { + printf("compote_context_execute() error %d\n", ret); + return ret; + } + + for (int i = 0; i < 3; i++) { + printf("fence: 0x%08x 0x%08x 0x%08x 0x%08x\n", fence[0], fence[1], fence[2], fence[3]); + printf("dst: 0x%08x 0x%08x 0x%08x 0x%08x\n", dst[0], dst[1], dst[2], dst[3]); + if (dst[3] == 0x3) + break; + sleep(1); + } + printf("dst: 0x%08x 0x%08x 0x%08x 0x%08x\n", dst[0], dst[1], dst[2], dst[3]); + + return 0; +} @@ -69,4 +69,26 @@ static inline uint32_t nvk_ni_cmd(unsigned subc, unsigned method, unsigned len) (0x3 << 29); } +static inline uint32_t nvk_addr_high(uint64_t offset) +{ + return (offset >> 32) & 0xffffffff; +} + +static inline uint32_t nvk_addr_low(uint64_t offset) +{ + return offset & 0xffffffff; +} + +static inline uint32_t nvk_size_high(uint64_t offset) +{ + return (offset >> 32) & 0xffffffff; +} + +static inline uint32_t nvk_size_low(uint64_t offset) +{ + return offset & 0xffffffff; +} + +int test_compute(compote_context_t *ctx, void *buffer, void *dstp, unsigned dimx); + #endif // COMPOTE_H @@ -32,6 +32,7 @@ int main(int argc, char *argv[]) { compote_context_t *ctx; compote_mo_t *mo; + void *dst; int ret; ret = compote_context_new(&ctx); @@ -39,11 +40,18 @@ int main(int argc, char *argv[]) return ret; } - ret = compote_mo_new(ctx, &mo, 64 << 10); + ret = compote_mo_new(ctx, &mo, 32 << 20); if (ret) { goto out; } + dst = malloc_below40(4 << 20); + if (dst == NULL) { + ret = -ENOMEM; + goto out; + } + printf("dst addr %p\n", dst); + { uint32_t *ptr = mo->ptr; uint32_t *sem = &ptr[128 >> 2]; @@ -67,6 +75,10 @@ int main(int argc, char *argv[]) printf("[%4d] = 0x%08x 0x%08x 0x%08x\n", 128 >> 2, sem[0], sem[1], sem[2]); } + ret = test_compute(ctx, mo->ptr, dst, 1024); + if (ret) + goto out; + printf("La compote c'est bon !\n"); out: @@ -31,7 +31,7 @@ int main(int argc, char *argv[]) { compote_context_t *ctx; - compote_mo_t *mo; + compote_mo_t *mo, *dst; int ret; ret = compote_context_new(&ctx); @@ -39,7 +39,12 @@ int main(int argc, char *argv[]) return ret; } - ret = compote_mo_new(ctx, &mo, 64 << 10); + ret = compote_mo_new(ctx, &mo, 32 << 20); + if (ret) { + goto out; + } + + ret = compote_mo_new(ctx, &dst, 4 << 20); if (ret) { goto out; } @@ -67,6 +72,10 @@ int main(int argc, char *argv[]) printf("[%4d] = 0x%08x 0x%08x 0x%08x\n", 128 >> 2, sem[0], sem[1], sem[2]); } + ret = test_compute(ctx, mo->ptr, dst->ptr, 1024); + if (ret) + goto out; + printf("La compote c'est bon !\n"); out: |