/* * Copyright 2017 Red Hat Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * Authors: Jérôme Glisse */ #include #include #include #include #include #include #include #include #include #include #include #include "compote.h" #include "compote-uapi.h" int compote_context_new(compote_context_t **ctxp) { struct compote_ioctl_channel_alloc arg; compote_context_t *ctx; int fd, ret; fd = open("/dev/compote", O_RDWR, 0); if (fd < 0) { fprintf(stderr, "could not open compote device\n"); return -ENODEV; } ctx = calloc(1, sizeof(*ctx)); if (ctx == NULL) { close(fd); return -ENOMEM; } ctx->channel.id = -1UL; ctx->fd = fd; ret = compote_context_ioctl(ctx, COMPOTE_IOCTL_CHAN_ALLOC, &arg); if (ret) { compote_context_del(&ctx); return ret; } ctx->channel.id = arg.channel; *ctxp = ctx; return 0; } void compote_context_del(compote_context_t **ctxp) { compote_context_t *ctx = *ctxp; *ctxp = NULL; if (ctx->channel.id != -1UL) { struct compote_ioctl_channel_free arg; arg.channel = ctx->channel.id; compote_context_ioctl(ctx, COMPOTE_IOCTL_CHAN_FREE, &arg); } close(ctx->fd); free(ctx); } int compote_context_ioctl(compote_context_t *ctx, int command, void *arg) { do { int ret; ret = ioctl(ctx->fd, command, arg); if (ret && errno != EINTR) { printf("ret %d errno %d\n", ret, errno); return ret; } } while (errno == EINTR); return 0; } int compote_mo_new(compote_context_t *ctx, compote_mo_t **mop, uint64_t nbytes) { static const unsigned long mask = ~((1UL << 12) - 1); static unsigned long addr = 1UL << 40; struct compote_ioctl_mem_alloc arg; compote_mo_t *mo; int ret; if (addr < nbytes) { return -ENOMEM; } *mop = NULL; mo = calloc(1, sizeof(*mo)); if (mo == NULL) { return -ENOMEM; } arg.nbytes = nbytes; ret = compote_context_ioctl(ctx, COMPOTE_IOCTL_MEM_ALLOC, &arg); if (ret) { free(mo); return ret; } mo->foffset = arg.foffset; mo->nbytes = nbytes; do { if (addr < nbytes) { mo->ptr = NULL; compote_mo_del(ctx, &mo); return -ENOMEM; } addr = (addr - nbytes) & mask; mo->ptr = mmap((void *)addr, mo->nbytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, ctx->fd, mo->foffset); } while (mo->ptr == MAP_FAILED); memset(mo->ptr, 0, mo->nbytes); *mop = mo; return 0; } void compote_mo_del(compote_context_t *ctx, compote_mo_t **mop) { struct compote_ioctl_mem_free arg; compote_mo_t *mo = *mop; *mop = NULL; if (mo == NULL) { return; } arg.foffset = mo->foffset; munmap(mo->ptr, mo->nbytes); compote_context_ioctl(ctx, COMPOTE_IOCTL_MEM_FREE, &arg); free(mo); } int compote_context_execute(compote_context_t *ctx, void *addr, unsigned ndw) { struct compote_ioctl_channel_execute arg; arg.channel = ctx->channel.id; arg.addr = (unsigned long)addr; arg.ndw = ndw; // Must be 32bits aligned if ((arg.addr & 3)) { return -EINVAL; } return compote_context_ioctl(ctx, COMPOTE_IOCTL_CHAN_EXEC, &arg); } void *malloc_below40(unsigned nbytes) { static unsigned long addr = 16 << 20; void *ptr; nbytes += ((1 << 12) - 1); nbytes &= ~((1 << 12) - 1); do { if ((addr + nbytes) > (1UL << 40)) { return NULL; } printf("addr 0x%p\n", (void *)addr); ptr = mmap((void *)addr, nbytes, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); addr += nbytes; } while (ptr == MAP_FAILED); return ptr; } struct nvk_00f0_cp_desc { uint32_t unk0[8]; uint32_t entry; uint32_t unk9[2]; uint32_t unk11_0 : 30; uint32_t linked_tsc : 1; uint32_t unk11_31 : 1; uint32_t griddim_x : 31; uint32_t unk12 : 1; uint16_t griddim_y; uint16_t unk13; uint16_t griddim_z; uint16_t unk14; uint32_t unk15[2]; uint32_t shared_size : 18; uint32_t unk17 : 14; uint16_t unk18; uint16_t blockdim_x; uint16_t blockdim_y; uint16_t blockdim_z; uint32_t cb_mask : 8; uint32_t unk20 : 24; uint32_t unk21[8]; uint32_t local_size_p : 24; uint32_t unk29 : 3; uint32_t bar_alloc : 5; uint32_t local_size_n : 24; uint32_t gpr_alloc : 8; uint32_t cstack_size : 24; uint32_t unk31 : 8; struct { uint32_t address_l; uint32_t address_h : 17; uint32_t reserved : 2; uint32_t size_sh4 : 13; } cb[8]; uint32_t unk48[16]; }; #define NVK_00f0_GRAPH_SERIALIZE 0x0110 #define NVK_00f0_SHARED_BASE 0x0214 #define NVK_00f0_UNK0248 0x0248 #define NVK_00f0_TEMP_SIZE_HIGH0 0x02e4 #define NVK_00f0_TEMP_SIZE_HIGH1 0x02f0 #define NVK_00f0_UNK0310 0x0310 #define NVK_00f0_TEMP_ADDRESS_HIGH 0x0790 #define NVK_00f0_LOCAL_BASE 0x077c int test_compute(compote_context_t *ctx, void *buffer, void *dstp, unsigned dimx) { uint64_t code[] = { //testtx: // sched 0x7f1 0x207f9 0x7f9 // mov $r2 $tid.x // lea 0x1 cc $r0 $r2 c0[0x0] 0x2 // lea hi x 0x1 $r1 $r2 c0[0x4] 0x0 0x2 // sched 0x7f9 0x7f5 0x7ff // stg e b32 ncg[$r0] $r2 // exit // exit 0x001fe440ff2007f1ul, 0xf0c8000002170002ul, 0x4bd7810000070200ul, 0x1a177f8000170201ul, 0x001ffc00fea007f9ul, 0xeedc200000070002ul, 0xe30000000007000ful, 0xe30000000007000ful, }; uint32_t *ptr = buffer; unsigned idx = 0; uint32_t *param, *dst, *fence, cmd_start, *tls; struct nvk_00f0_cp_desc *desc; uint64_t cmd_addr, dst_addr, code_addr, tls_size; uint64_t desc_addr, param_addr, fence_addr, tls_addr; int ret; tls_size = 16 << 20; tls = (void*)&ptr[idx]; tls_addr = (unsigned long)tls; idx += tls_size >> 2; dst = (void*)dstp; dst_addr = (unsigned long)dstp; fence = (void*)&ptr[idx]; fence_addr = (unsigned long)fence; idx += 64; idx = (idx + 255) & (~255); code_addr = (unsigned long)&ptr[idx]; memcpy(&ptr[idx], code, sizeof(code)); idx += sizeof(code) >> 2; idx = (idx + 255) & (~255); param = &ptr[idx]; param_addr = (unsigned long)param; param[0] = nvk_addr_low(dst_addr); param[1] = nvk_addr_high(dst_addr); param[2] = 0x00000004; idx += 256; idx = (idx + 255) & (~255); desc = (void *)&ptr[idx]; desc_addr = (unsigned long)desc; memset(desc, 0, sizeof(*desc)); desc->griddim_x = dimx / 16; desc->griddim_y = 1; desc->griddim_z = 1; desc->blockdim_x = 16; desc->blockdim_y = 1; desc->blockdim_z = 1; desc->entry = 0x0; desc->shared_size = 0; desc->local_size_p = 0; desc->bar_alloc = 0; desc->local_size_n = 0; desc->gpr_alloc = 8; desc->cstack_size = 0x1000; desc->cb_mask = 1; desc->cb[0].address_h = nvk_addr_high(param_addr); desc->cb[0].address_l = nvk_addr_low(param_addr); desc->cb[0].size_sh4 = 256 >> 4; desc->unk0[4] = 0x40; desc->unk11_0 = 0x04014000; idx += sizeof(*desc) >> 2; cmd_addr = (unsigned long)&ptr[idx]; cmd_start = idx; ptr[idx++] = nvk_sq_cmd(1, 0x0000, 1); ptr[idx++] = 0xc1c0; ptr[idx++] = nvk_sq_cmd(1, NVK_00f0_GRAPH_SERIALIZE, 1); ptr[idx++] = 0x00000000; tls_size = tls_size / 16; ptr[idx++] = nvk_sq_cmd(1, 0x0790, 2); ptr[idx++] = nvk_addr_high(tls_addr); ptr[idx++] = nvk_addr_low(tls_addr); ptr[idx++] = nvk_sq_cmd(1, 0x02e4, 3); ptr[idx++] = nvk_addr_high(tls_size); ptr[idx++] = nvk_addr_low(tls_size) & ~0x7fff; ptr[idx++] = 0x000000ff; ptr[idx++] = nvk_sq_cmd(1, 0x02f0, 3); ptr[idx++] = nvk_addr_high(tls_size); ptr[idx++] = nvk_addr_low(tls_size) & ~0x7fff; ptr[idx++] = 0x000000ff; ptr[idx++] = nvk_sq_cmd(1, 0x077c, 1); ptr[idx++] = 0xff000000; ptr[idx++] = nvk_sq_cmd(1, 0x0214, 1); ptr[idx++] = 0xfe000000; ptr[idx++] = nvk_sq_cmd(1, 0x1608, 2); ptr[idx++] = nvk_addr_high(code_addr); ptr[idx++] = nvk_addr_low(code_addr); ptr[idx++] = nvk_sq_cmd(1, 0x0310, 1); ptr[idx++] = 0x00000400; ptr[idx++] = nvk_ni_cmd(1, 0x0248, 64); for (int i = 63; i >= 0; --i) { ptr[idx++] = 0x00038000 | i; } ptr[idx++] = nvk_ni_cmd(1, 0x0110, 1); ptr[idx++] = 0x00000000; ptr[idx++] = nvk_sq_cmd(1, 0x2608, 1); ptr[idx++] = 0x00000000; #if 0 ptr[idx++] = nvk_sq_cmd(1, 0x0180, 4); ptr[idx++] = sizeof(code); ptr[idx++] = 0x00000001; ptr[idx++] = nvk_addr_high(code_addr); ptr[idx++] = nvk_addr_low(code_addr); ptr[idx++] = nvk_sq_cmd(1, 0x01b0, 1); ptr[idx++] = 0x00000041; ptr[idx++] = nvk_ni_cmd(1, 0x01b4, sizeof(code)/4); for (int i = 0; i < sizeof(code) / 4; ++i) { ptr[idx++] = code[i]; } ptr[idx++] = nvk_ni_cmd(1, NVK_00f0_GRAPH_SERIALIZE, 1); ptr[idx++] = 0x00000000; ptr[idx++] = nvk_sq_cmd(1, 0x0180, 4); ptr[idx++] = 12; ptr[idx++] = 1; ptr[idx++] = nvk_addr_high(param_addr); ptr[idx++] = nvk_addr_low(param_addr); ptr[idx++] = nvk_sq_cmd(1, 0x01b0, 1); ptr[idx++] = 0x00000041; #endif ptr[idx++] = nvk_sq_cmd(1, 0x1698, 1); ptr[idx++] = 0x00001000; ptr[idx++] = nvk_sq_cmd(1, 0x021c, 1); ptr[idx++] = 0x00001017; #if 0 printf("param addr 0x%016lx > 40buts ? %d\n", param_addr, param_addr > (1UL << 40)); ptr[idx++] = nvk_sq_cmd(1, 0x0274, 3); ptr[idx++] = nvk_addr_high(param_addr); ptr[idx++] = nvk_addr_low(param_addr); ptr[idx++] = 0x000000ff; #endif ptr[idx++] = nvk_sq_cmd(1, 0x02b4, 1); ptr[idx++] = desc_addr >> 8; ptr[idx++] = nvk_sq_cmd(1, 0x02bc, 1); ptr[idx++] = 0x00000003; #if 0 // BAD #endif // BAD ptr[idx++] = nvk_sq_cmd(1, 0x0110, 1); ptr[idx++] = 0x00000000; fence[0] = 0xcafedead; ptr[idx++] = nvk_sq_cmd(1, 0x1b00, 4); ptr[idx++] = nvk_addr_high(fence_addr); ptr[idx++] = nvk_addr_low(fence_addr); ptr[idx++] = 0xdeadcafe; ptr[idx++] = 0x00000000; ret = compote_context_execute(ctx, (void*)cmd_addr, ((idx - cmd_start) + 1)); if (ret) { printf("compote_context_execute() error %d\n", ret); return ret; } for (int i = 0; i < 3; i++) { printf("fence: 0x%08x 0x%08x 0x%08x 0x%08x\n", fence[0], fence[1], fence[2], fence[3]); printf("dst: 0x%08x 0x%08x 0x%08x 0x%08x\n", dst[0], dst[1], dst[2], dst[3]); if (dst[3] == 0x3) break; sleep(1); } printf("dst: 0x%08x 0x%08x 0x%08x 0x%08x\n", dst[0], dst[1], dst[2], dst[3]); return 0; }