summaryrefslogtreecommitdiff
path: root/moche.c
diff options
context:
space:
mode:
Diffstat (limited to 'moche.c')
-rw-r--r--moche.c537
1 files changed, 537 insertions, 0 deletions
diff --git a/moche.c b/moche.c
new file mode 100644
index 0000000..5b16a0d
--- /dev/null
+++ b/moche.c
@@ -0,0 +1,537 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Jérôme Glisse <jglisse@redhat.com>
+ */
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include "xf86drm.h"
+#include "libdrm/nouveau/nouveau.h"
+#include "nvif/class.h"
+#include "nvif/cl0080.h"
+#include "nvif/if0008.h"
+#include "nvif/if000c.h"
+
+struct nvk_00f0_cp_desc {
+ uint32_t unk0[8];
+ uint32_t entry;
+ uint32_t unk9[2];
+ uint32_t unk11_0 : 30;
+ uint32_t linked_tsc : 1;
+ uint32_t unk11_31 : 1;
+ uint32_t griddim_x : 31;
+ uint32_t unk12 : 1;
+ uint16_t griddim_y;
+ uint16_t unk13;
+ uint16_t griddim_z;
+ uint16_t unk14;
+ uint32_t unk15[2];
+ uint32_t shared_size : 18;
+ uint32_t unk17 : 14;
+ uint16_t unk18;
+ uint16_t blockdim_x;
+ uint16_t blockdim_y;
+ uint16_t blockdim_z;
+ uint32_t cb_mask : 8;
+ uint32_t unk20 : 24;
+ uint32_t unk21[8];
+ uint32_t local_size_p : 24;
+ uint32_t unk29 : 3;
+ uint32_t bar_alloc : 5;
+ uint32_t local_size_n : 24;
+ uint32_t gpr_alloc : 8;
+ uint32_t cstack_size : 24;
+ uint32_t unk31 : 8;
+ struct {
+ uint32_t address_l;
+ uint32_t address_h : 17;
+ uint32_t reserved : 2;
+ uint32_t size_sh4 : 13;
+ } cb[8];
+ uint32_t unk48[16];
+};
+
+
+typedef struct {
+ struct nouveau_client *nvclient;
+ struct nouveau_device *nvdevice;
+ struct nouveau_object *nvchannel;
+ struct nouveau_object *nvcompute;
+ struct nouveau_pushbuf *nvpushbuf;
+ struct nouveau_drm *nvdrm;
+ void *hole;
+ int fd;
+} moche_t;
+
+#define DRM_FILE_PAGE_OFFSET (0x100000000ULL)
+
+int moche_init_hmm(moche_t *moche)
+{
+ moche->hole = mmap((void *)(1UL << 30), (2UL << 30), PROT_NONE,
+ MAP_PRIVATE, moche->fd, DRM_FILE_PAGE_OFFSET);
+ printf("hmm init %p\n", moche->hole);
+ return 0;
+}
+
+int moche_init(moche_t *moche)
+{
+ int r;
+
+ moche->fd = drmOpen("nouveau", NULL);
+ if (moche->fd < 0) {
+ printf("EE: failed to open nouveau device file (%d)\n", moche->fd);
+ return -1;
+ }
+
+ if ((r = nouveau_drm_new(moche->fd, &moche->nvdrm))) {
+ printf("EE: failed to create nouveau drm (%d)\n", r);
+ return r;
+ }
+
+ {
+ struct nv_device_v0 arg = {0};
+ uint32_t oclass, size;
+ void *data;
+
+ arg.device = ~0ULL;
+ oclass = NV_DEVICE;
+ size = sizeof(arg);
+ data = &arg;
+ if ((r = nouveau_device_new(&moche->nvdrm->client, oclass, data,
+ size, &moche->nvdevice))) {
+ printf("EE: failed to create nouveau device (%d)\n", r);
+ goto nouveau_device;
+ }
+ }
+
+ if ((r = nouveau_client_new(moche->nvdevice, &moche->nvclient))) {
+ printf("EE: failed to create nouveau client (%d)\n", r);
+ goto nouveau_client;
+ }
+
+ if ((r = moche_init_hmm(moche))) {
+ printf("EE: failed to enbable HMM (%d)\n", r);
+ goto nouveau_hmm;
+ }
+
+ {
+ struct nve0_fifo arg = {0};
+ uint32_t oclass, size;
+ void *data;
+
+ oclass = NOUVEAU_FIFO_CHANNEL_CLASS;
+ arg.engine = 0x01 | 0x10 | 0x20; // NVA06F_V0_ENGINE_CE0 | NVA06F_V0_ENGINE_CE1
+ size = sizeof(arg);
+ data = &arg;
+ if ((r = nouveau_object_new(&moche->nvdevice->object, 0, oclass,
+ data, size, &moche->nvchannel))) {
+ printf("EE: failed to create nouveau channel (%d)\n", r);
+ goto nouveau_channel;
+ }
+ }
+
+ {
+ if ((r = nouveau_pushbuf_new(moche->nvclient, moche->nvchannel, 2,
+ 64 * 1024, 1, &moche->nvpushbuf))) {
+ printf("EE: failed to create nouveau pushbuf (%d)\n", r);
+ goto nouveau_pushbuf;
+ }
+ }
+
+ {
+ if ((r = nouveau_object_new(moche->nvchannel, 0xcafec1c0,
+ 0xc1c0, NULL, 0, &moche->nvcompute))) {
+ printf("EE: failed to create nouveau channel (%d)\n", r);
+ goto nouveau_compute;
+ }
+ }
+
+ return 0;
+
+nouveau_compute:
+nouveau_pushbuf:
+ nouveau_object_del(&moche->nvchannel);
+nouveau_channel:
+nouveau_hmm:
+ nouveau_client_del(&moche->nvclient);
+nouveau_client:
+ nouveau_device_del(&moche->nvdevice);
+nouveau_device:
+ nouveau_drm_del(&moche->nvdrm);
+ return r;
+}
+
+void moche_fini(moche_t *moche)
+{
+ nouveau_object_del(&moche->nvcompute);
+ nouveau_pushbuf_del(&moche->nvpushbuf);
+ nouveau_object_del(&moche->nvchannel);
+ nouveau_client_del(&moche->nvclient);
+ nouveau_device_del(&moche->nvdevice);
+ nouveau_drm_del(&moche->nvdrm);
+}
+
+static inline void moche_push_data(moche_t *moche, uint32_t data)
+{
+ *moche->nvpushbuf->cur++ = data;
+}
+
+static inline int moche_push_kick(moche_t *moche)
+{
+ return nouveau_pushbuf_kick(moche->nvpushbuf, moche->nvchannel);
+}
+
+static inline void moche_push_refn(moche_t *moche,
+ struct nouveau_bo *bo, uint32_t flags)
+{
+ struct nouveau_pushbuf_refn ref = { bo, flags };
+
+ nouveau_pushbuf_refn(moche->nvpushbuf, &ref, 1);
+}
+
+static inline uint32_t nvk_sq_cmd(unsigned subc, unsigned method, unsigned len)
+{
+ return ((method >> 2) & 0x1fff) |
+ ((len & 0xfff) << 16) |
+ ((subc & 0x7) << 13) |
+ (0x1 << 29);
+}
+
+static inline uint32_t nvk_ni_cmd(unsigned subc, unsigned method, unsigned len)
+{
+ return ((method >> 2) & 0x1fff) |
+ ((len & 0xfff) << 16) |
+ ((subc & 0x7) << 13) |
+ (0x3 << 29);
+}
+
+static inline uint32_t nvk_addr_high(uint64_t offset)
+{
+ return (offset >> 32) & 0xffffffff;
+}
+
+static inline uint32_t nvk_addr_low(uint64_t offset)
+{
+ return offset & 0xffffffff;
+}
+
+static inline uint32_t nvk_size_high(uint64_t offset)
+{
+ return (offset >> 32) & 0xffffffff;
+}
+
+static inline uint32_t nvk_size_low(uint64_t offset)
+{
+ return offset & 0xffffffff;
+}
+
+int moche_pushbuf_test(moche_t *moche)
+{
+ struct nouveau_bo *bo;
+ uint32_t *ptr;
+ int r, i;
+
+ if ((r = nouveau_bo_new(moche->nvdevice, NOUVEAU_BO_GART,
+ 0, 4 * 1024, NULL, &bo))) {
+ printf("EE: failed to create nouveau bo (%d)\n", r);
+ goto nouveau_bo_new;
+ }
+ if ((r = nouveau_bo_map(bo, NOUVEAU_BO_WR, moche->nvclient))) {
+ printf("EE: failed to map nouveau bo (%d)\n", r);
+ goto nouveau_bo_map;
+ }
+
+ ptr = bo->map;
+ ptr[0] = 0xcafedead;
+
+ if ((r = nouveau_pushbuf_space(moche->nvpushbuf, 9, 1, 0))) {
+ printf("EE: pushbuf full (%d)\n", r);
+ goto nouveau_bo_new;
+ }
+ moche_push_refn(moche, bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0000, 1));
+ moche_push_data(moche, moche->nvcompute->oclass);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0110, 1));
+ moche_push_data(moche, 0x00000000);
+#if 1
+ moche_push_data(moche, nvk_sq_cmd(1, 0x1b00, 4));
+ moche_push_data(moche, nvk_addr_high(bo->offset));
+ moche_push_data(moche, nvk_addr_low(bo->offset));
+ moche_push_data(moche, 0xdeadcafe);
+ moche_push_data(moche, 0x00000000);
+#else
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0010, 4));
+ moche_push_data(moche, nvk_addr_high(bo->offset));
+ moche_push_data(moche, nvk_addr_low(bo->offset));
+ moche_push_data(moche, 0xdeadcafe);
+ moche_push_data(moche, 0x00000002);
+#endif
+
+ if ((r = moche_push_kick(moche))) {
+ printf("EE: pushbuf kick (%d)\n", r);
+ goto nouveau_push_kick;
+ }
+
+ for (i = 0; i < 10 && ptr[0] != 0xdeadcafe; ++i) {
+ nouveau_bo_wait(bo, NOUVEAU_BO_RD, moche->nvclient);
+ }
+ if (ptr[0] == 0xdeadcafe) {
+ printf("OK: pusbuf test 0x%08x\n", ptr[0]);
+ } else {
+ printf("EE: pusbuf test 0x%08x\n", ptr[0]);
+ }
+
+nouveau_push_kick:
+nouveau_bo_map:
+ nouveau_bo_ref(NULL, &bo);
+nouveau_bo_new:
+ return r;
+}
+
+int moche_compute_test(moche_t *moche, uint64_t doffset,
+ uint32_t *rptr, unsigned nelem,
+ struct nouveau_bo *bor)
+{
+ uint64_t code[] = {
+ //testtx:
+ // sched 0x7f1 0x207f9 0x7f9
+ // mov $r2 $tid.x
+ // lea 0x1 cc $r0 $r2 c0[0x0] 0x2
+ // lea hi x 0x1 $r1 $r2 c0[0x4] 0x0 0x2
+ // sched 0x7f9 0x7f5 0x7ff
+ // stg e b32 ncg[$r0] $r2
+ // exit
+ // exit
+ 0x001fe440ff2007f1ul,
+ 0xf0c8000002170002ul,
+ 0x4bd7810000070200ul,
+ 0x1a177f8000170201ul,
+ 0x001ffc00fea007f9ul,
+ 0xeedc200000070002ul,
+ 0xe30000000007000ful,
+ 0xe30000000007000ful,
+ };
+ struct nouveau_bo *bom, *tls;
+ struct nvk_00f0_cp_desc *desc;
+ uint32_t *mptr;
+ int r, i, tls_size;
+
+ tls_size = 16 << 20;
+ if ((r = nouveau_bo_new(moche->nvdevice, NOUVEAU_BO_GART,
+ 0, tls_size, NULL, &tls))) {
+ printf("EE: failed to create nouveau bo (%d)\n", r);
+ goto nouveau_tls_new;
+ }
+ if ((r = nouveau_bo_new(moche->nvdevice, NOUVEAU_BO_GART,
+ 0, 64 * 1024, NULL, &bom))) {
+ printf("EE: failed to create nouveau bo (%d)\n", r);
+ goto nouveau_bom_new;
+ }
+ if ((r = nouveau_bo_map(bom, NOUVEAU_BO_WR, moche->nvclient))) {
+ printf("EE: failed to map nouveau bo (%d)\n", r);
+ goto nouveau_bom_map;
+ }
+
+ mptr = bom->map;
+ printf("bom offset 0x%lx\n", (unsigned long)bom->offset);
+
+ // initialize result bo
+ memset(rptr, 0, nelem * 4);
+
+ // fence
+ mptr[0] = 0xcafedead;
+ memcpy(&mptr[1024], code, sizeof(code));
+
+ // param
+ mptr[64] = nvk_addr_low(doffset);
+ mptr[65] = nvk_addr_high(doffset);
+
+ // compute descriptor
+ desc = (void *)&mptr[128];
+ memset(desc, 0, sizeof(*desc));
+ desc->griddim_x = nelem;
+ desc->griddim_y = 1;
+ desc->griddim_z = 1;
+ desc->blockdim_x = 32;
+ desc->blockdim_y = 1;
+ desc->blockdim_z = 1;
+ desc->entry = 0x0;
+ desc->shared_size = 0;
+ desc->local_size_p = 0;
+ desc->bar_alloc = 0;
+ desc->local_size_n = 0;
+ desc->gpr_alloc = 8;
+ desc->cstack_size = 0x1000;
+ desc->cb_mask = 1;
+ desc->cb[0].address_h = nvk_addr_high(bom->offset + 64 * 4);
+ desc->cb[0].address_l = nvk_addr_low(bom->offset + 64 * 4);
+ desc->cb[0].size_sh4 = 256 >> 4;
+ desc->unk0[4] = 0x40;
+ desc->unk11_0 = 0x04014000;
+
+ if ((r = nouveau_pushbuf_space(moche->nvpushbuf, 1024, 3, 0))) {
+ printf("EE: pushbuf full (%d)\n", r);
+ goto nouveau_push_space;
+ }
+ moche_push_refn(moche, bom, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ if (bor)
+ moche_push_refn(moche, bor, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ moche_push_refn(moche, tls, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0000, 1));
+ moche_push_data(moche, moche->nvcompute->oclass);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0110, 1));
+ moche_push_data(moche, 0x00000000);
+
+ tls_size = tls_size / 16;
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0790, 2));
+ moche_push_data(moche, nvk_addr_high(tls->offset));
+ moche_push_data(moche, nvk_addr_low(tls->offset));
+ moche_push_data(moche, nvk_sq_cmd(1, 0x02e4, 3));
+ moche_push_data(moche, nvk_addr_high(tls_size));
+ moche_push_data(moche, nvk_addr_low(tls_size) & ~0x7fff);
+ moche_push_data(moche, 0x000000ff);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x02f0, 3));
+ moche_push_data(moche, nvk_addr_high(tls_size));
+ moche_push_data(moche, nvk_addr_low(tls_size) & ~0x7fff);
+ moche_push_data(moche, 0x000000ff);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x077c, 1));
+ moche_push_data(moche, 0xff000000);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0214, 1));
+ moche_push_data(moche, 0xfe000000);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x1608, 2));
+ moche_push_data(moche, nvk_addr_high(bom->offset + 1024 * 4));
+ moche_push_data(moche, nvk_addr_low(bom->offset + 1024 * 4));
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0310, 1));
+ moche_push_data(moche, 0x00000400);
+ moche_push_data(moche, nvk_ni_cmd(1, 0x0248, 64));
+ for (int i = 63; i >= 0; --i) {
+ moche_push_data(moche, 0x00038000 | i);
+ }
+ moche_push_data(moche, nvk_ni_cmd(1, 0x0110, 1));
+ moche_push_data(moche, 0x00000000);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x2608, 1));
+ moche_push_data(moche, 0x00000000);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x1698, 1));
+ moche_push_data(moche, 0x00001000);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x021c, 1));
+ moche_push_data(moche, 0x00001017);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x02b4, 1));
+ moche_push_data(moche, (bom->offset + 128 * 4) >> 8);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x02bc, 1));
+ moche_push_data(moche, 0x00000003);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0110, 1));
+ moche_push_data(moche, 0x00000000);
+
+ moche_push_data(moche, nvk_sq_cmd(1, 0x0110, 1));
+ moche_push_data(moche, 0x00000000);
+ moche_push_data(moche, nvk_sq_cmd(1, 0x1b00, 4));
+ moche_push_data(moche, nvk_addr_high(bom->offset));
+ moche_push_data(moche, nvk_addr_low(bom->offset));
+ moche_push_data(moche, 0xdeadcafe);
+ moche_push_data(moche, 0x00000000);
+
+ if ((r = moche_push_kick(moche))) {
+ printf("EE: pushbuf kick (%d)\n", r);
+ goto nouveau_push_kick;
+ }
+
+ for (i = 0; i < 10 && mptr[0] != 0xdeadcafe; ++i) {
+ nouveau_bo_wait(bom, NOUVEAU_BO_RD, moche->nvclient);
+ }
+ if (mptr[0] == 0xdeadcafe) {
+ printf("OK: compute test 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ mptr[0], rptr[0], rptr[1], rptr[nelem - 1]);
+ } else {
+ printf("EE: compute test 0x%08x\n", mptr[0]);
+ }
+ for (i = 0; i < nelem; ++i) {
+ if (rptr[i] != i) {
+ printf("EE: rptr[%d] != %d -> %d\n", i, i, rptr[i]);
+ break;
+ }
+ }
+
+nouveau_push_kick:
+nouveau_push_space:
+nouveau_bom_map:
+ nouveau_bo_ref(NULL, &bom);
+nouveau_bom_new:
+ nouveau_bo_ref(NULL, &tls);
+nouveau_tls_new:
+ return r;
+}
+
+int main(int argc, char *argv[])
+{
+ struct nouveau_bo *bor = NULL;
+ unsigned nelem = 512 * 1024;
+ moche_t moche = {0};
+ int r;
+
+ if ((r = moche_init(&moche))) {
+ return r;
+ }
+
+ printf("OK: fd %d ok chipset 0x%08x (press enter to run)\n",
+ moche.fd, moche.nvdevice->chipset);
+ getchar();
+
+ if ((r = moche_pushbuf_test(&moche))) {
+ return r;
+ }
+
+ if (0) {
+ uint32_t *rptr;
+
+ if ((r = nouveau_bo_new(moche.nvdevice, NOUVEAU_BO_GART,
+ 0, 4 * nelem, NULL, &bor))) {
+ printf("EE: failed to create nouveau bo (%d)\n", r);
+ goto nouveau_bor_new;
+ }
+ if ((r = nouveau_bo_map(bor, NOUVEAU_BO_WR, moche.nvclient))) {
+ printf("EE: failed to map nouveau bo (%d)\n", r);
+ goto nouveau_bor_map;
+ }
+ rptr = bor->map;
+ printf("bor offset 0x%lx\n", (unsigned long)bor->offset);
+
+ if ((r = moche_compute_test(&moche, bor->offset, rptr, nelem, bor))) {
+ return r;
+ }
+ } else {
+ uint32_t *rptr;
+
+ rptr = malloc(nelem * 4);
+ printf("bor offset 0x%lx\n", (unsigned long)rptr);
+ if ((r = moche_compute_test(&moche, (uint64_t)rptr, rptr, nelem, NULL))) {
+ return r;
+ }
+ }
+
+nouveau_bor_map:
+ if (bor)
+ nouveau_bo_ref(NULL, &bor);
+nouveau_bor_new:
+ moche_fini(&moche);
+ return 0;
+}