nouveau: Support fence FDsnouveau-sync-fd

Add a new nouveau_pushbuf_kick_fence() function that takes and emits a sync fence FD. The fence FD can be waited on, or merged with other fence FDs, or passed back to the kernel as a prerequisite for a subsequent HW operation. Based heavily on work by Lauri Peltonen <lpeltonen@nvidia.com> Signed-off-by: Thierry Reding <treding@nvidia.com>
author: Thierry Reding <treding@nvidia.com> 2017-10-02 15:51:08 +0200
committer: Thierry Reding <treding@nvidia.com> 2018-01-11 22:46:54 +0100
commit: 1d2ca5ab5153fcd849e37eb1f584336546b720bd (patch)
tree: fc59109a06e0ac01facd07fbd4f31d8deb6805a4
parent: d3cb58831517d4d42869a7c1f518c861baafdc8e (diff)
3 files changed, 121 insertions, 34 deletions
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index cb077821..e7f1a2ae 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -178,6 +178,28 @@ struct drm_nouveau_gem_pushbuf {
 	__u64 gart_available;
 };
 
+#define NOUVEAU_GEM_PUSHBUF_FENCE_WAIT (1 << 0)
+#define NOUVEAU_GEM_PUSHBUF_FENCE_EMIT (1 << 1)
+#define NOUVEAU_GEM_PUSHBUF_FLAGS (NOUVEAU_GEM_PUSHBUF_FENCE_WAIT | \
+                                   NOUVEAU_GEM_PUSHBUF_FENCE_EMIT)
+
+struct drm_nouveau_gem_pushbuf2 {
+    uint32_t channel;
+    uint32_t nr_buffers;
+    uint64_t buffers;
+    uint32_t nr_relocs;
+    uint32_t nr_push;
+    uint64_t relocs;
+    uint64_t push;
+    uint32_t suffix0;
+    uint32_t suffix1;
+    uint64_t vram_available;
+    uint64_t gart_available;
+    uint32_t flags;
+    int32_t fence;
+    uint64_t reserved;
+};
+
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_NOBLOCK                                 0x00000002
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
@@ -212,6 +234,7 @@ struct drm_nouveau_sarea {
 #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
+#define DRM_NOUVEAU_GEM_PUSHBUF2       0x45
 
 #if defined(__cplusplus)
 }
diff --git a/nouveau/nouveau.h b/nouveau/nouveau.h
index 335ce77d..70d68070 100644
--- a/nouveau/nouveau.h
+++ b/nouveau/nouveau.h
@@ -226,6 +226,8 @@ void nouveau_pushbuf_reloc(struct nouveau_pushbuf *, struct nouveau_bo *,
 int nouveau_pushbuf_validate(struct nouveau_pushbuf *);
 uint32_t nouveau_pushbuf_refd(struct nouveau_pushbuf *, struct nouveau_bo *);
 int nouveau_pushbuf_kick(struct nouveau_pushbuf *, struct nouveau_object *chan);
+int nouveau_pushbuf_kick_fence(struct nouveau_pushbuf *,
+                               struct nouveau_object *chan, int *fence);
 struct nouveau_bufctx *
 nouveau_pushbuf_bufctx(struct nouveau_pushbuf *, struct nouveau_bufctx *);
 
diff --git a/nouveau/pushbuf.c b/nouveau/pushbuf.c
index 035e3019..f13804db 100644
--- a/nouveau/pushbuf.c
+++ b/nouveau/pushbuf.c
@@ -33,6 +33,7 @@
 #include <string.h>
 #include <assert.h>
 #include <errno.h>
+#include <unistd.h>
 
 #include <xf86drm.h>
 #include <xf86atomic.h>
@@ -77,7 +78,7 @@ nouveau_pushbuf(struct nouveau_pushbuf *push)
 }
 
 static int pushbuf_validate(struct nouveau_pushbuf *, bool);
-static int pushbuf_flush(struct nouveau_pushbuf *);
+static int pushbuf_flush(struct nouveau_pushbuf *, int *);
 
 static bool
 pushbuf_kref_fits(struct nouveau_pushbuf *push, struct nouveau_bo *bo,
@@ -172,7 +173,7 @@ pushbuf_kref(struct nouveau_pushbuf *push, struct nouveau_bo *bo,
 	 */
 	fpush = cli_push_get(push->client, bo);
 	if (fpush && fpush != push)
-		pushbuf_flush(fpush);
+		pushbuf_flush(fpush, NULL);
 
 	kref = cli_kref_get(push->client, bo);
 	if (kref) {
@@ -307,7 +308,8 @@ pushbuf_dump(struct nouveau_pushbuf_krec *krec, int krec_id, int chid)
 }
 
 static int
-pushbuf_submit(struct nouveau_pushbuf *push, struct nouveau_object *chan)
+pushbuf_submit(struct nouveau_pushbuf *push, struct nouveau_object *chan,
+               int *fence)
 {
 	struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
 	struct nouveau_pushbuf_krec *krec = nvpb->list;
@@ -315,9 +317,9 @@ pushbuf_submit(struct nouveau_pushbuf *push, struct nouveau_object *chan)
 	struct nouveau_drm *drm = nouveau_drm(&dev->object);
 	struct drm_nouveau_gem_pushbuf_bo_presumed *info;
 	struct drm_nouveau_gem_pushbuf_bo *kref;
-	struct drm_nouveau_gem_pushbuf req;
 	struct nouveau_fifo *fifo = chan->data;
 	struct nouveau_bo *bo;
+	int fence_out = -1;
 	int krec_id = 0;
 	int ret = 0, i;
 
@@ -330,35 +332,81 @@ pushbuf_submit(struct nouveau_pushbuf *push, struct nouveau_object *chan)
 	nouveau_pushbuf_data(push, NULL, 0, 0);
 
 	while (krec && krec->nr_push) {
-		req.channel = fifo->channel;
-		req.nr_buffers = krec->nr_buffer;
-		req.buffers = (uint64_t)(unsigned long)krec->buffer;
-		req.nr_relocs = krec->nr_reloc;
-		req.nr_push = krec->nr_push;
-		req.relocs = (uint64_t)(unsigned long)krec->reloc;
-		req.push = (uint64_t)(unsigned long)krec->push;
-		req.suffix0 = nvpb->suffix0;
-		req.suffix1 = nvpb->suffix1;
-		req.vram_available = 0; /* for valgrind */
-		req.gart_available = 0;
-
 		if (dbg_on(0))
 			pushbuf_dump(krec, krec_id++, fifo->channel);
 
+		/* TODO If fence is requested, force kickoff. */
+		if (fence) {
+			struct drm_nouveau_gem_pushbuf2 req;
+
+			memset(&req, 0, sizeof(req));
+			req.channel = fifo->channel;
+			req.nr_buffers = krec->nr_buffer;
+			req.buffers = (uint64_t)(unsigned long)krec->buffer;
+			req.nr_relocs = krec->nr_reloc;
+			req.nr_push = krec->nr_push;
+			req.relocs = (uint64_t)(unsigned long)krec->reloc;
+			req.push = (uint64_t)(unsigned long)krec->push;
+			req.suffix0 = nvpb->suffix0;
+			req.suffix1 = nvpb->suffix1;
+			req.vram_available = 0; /* for valgrind */
+			req.gart_available = 0;
+			req.flags = 0;
+
+			if (*fence >= 0)
+				req.flags |= NOUVEAU_GEM_PUSHBUF_FENCE_WAIT;
+
+			req.flags |= NOUVEAU_GEM_PUSHBUF_FENCE_EMIT;
+
+			req.fence = *fence;
+			req.reserved = 0;
+
 #ifndef SIMULATE
-		ret = drmCommandWriteRead(drm->fd, DRM_NOUVEAU_GEM_PUSHBUF,
-					  &req, sizeof(req));
-		nvpb->suffix0 = req.suffix0;
-		nvpb->suffix1 = req.suffix1;
-		dev->vram_limit = (req.vram_available *
-				nouveau_device(dev)->vram_limit_percent) / 100;
-		dev->gart_limit = (req.gart_available *
-				nouveau_device(dev)->gart_limit_percent) / 100;
+			ret = drmCommandWriteRead(drm->fd, DRM_NOUVEAU_GEM_PUSHBUF2,
+									  &req, sizeof(req));
+			nvpb->suffix0 = req.suffix0;
+			nvpb->suffix1 = req.suffix1;
+			dev->vram_limit = (req.vram_available *
+					nouveau_device(dev)->vram_limit_percent) / 100;
+			dev->gart_limit = (req.gart_available *
+					nouveau_device(dev)->gart_limit_percent) / 100;
 #else
-		if (dbg_on(31))
-			ret = -EINVAL;
+			if (dbg_on(31))
+				ret = -EINVAL;
 #endif
 
+			if (!ret)
+				fence_out = req.fence;
+		} else {
+			struct drm_nouveau_gem_pushbuf req;
+
+			req.channel = fifo->channel;
+			req.nr_buffers = krec->nr_buffer;
+			req.buffers = (uint64_t)(unsigned long)krec->buffer;
+			req.nr_relocs = krec->nr_reloc;
+			req.nr_push = krec->nr_push;
+			req.relocs = (uint64_t)(unsigned long)krec->reloc;
+			req.push = (uint64_t)(unsigned long)krec->push;
+			req.suffix0 = nvpb->suffix0;
+			req.suffix1 = nvpb->suffix1;
+			req.vram_available = 0; /* for valgrind */
+			req.gart_available = 0;
+
+#ifndef SIMULATE
+			ret = drmCommandWriteRead(drm->fd, DRM_NOUVEAU_GEM_PUSHBUF,
+									  &req, sizeof(req));
+			nvpb->suffix0 = req.suffix0;
+			nvpb->suffix1 = req.suffix1;
+			dev->vram_limit = (req.vram_available *
+					nouveau_device(dev)->vram_limit_percent) / 100;
+			dev->gart_limit = (req.gart_available *
+					nouveau_device(dev)->gart_limit_percent) / 100;
+#else
+			if (dbg_on(31))
+				ret = -EINVAL;
+#endif
+        }
+
 		if (ret) {
 			err("kernel rejected pushbuf: %s\n", strerror(-ret));
 			pushbuf_dump(krec, krec_id++, fifo->channel);
@@ -388,11 +436,18 @@ pushbuf_submit(struct nouveau_pushbuf *push, struct nouveau_object *chan)
 		krec = krec->next;
 	}
 
+	if (!ret && fence) {
+		if (*fence >= 0)
+			close(*fence);
+
+		*fence = fence_out;
+	}
+
 	return ret;
 }
 
 static int
-pushbuf_flush(struct nouveau_pushbuf *push)
+pushbuf_flush(struct nouveau_pushbuf *push, int *fence)
 {
 	struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
 	struct nouveau_pushbuf_krec *krec = nvpb->krec;
@@ -402,7 +457,7 @@ pushbuf_flush(struct nouveau_pushbuf *push)
 	int ret = 0, i;
 
 	if (push->channel) {
-		ret = pushbuf_submit(push, push->channel);
+		ret = pushbuf_submit(push, push->channel, fence);
 	} else {
 		nouveau_pushbuf_data(push, NULL, 0, 0);
 		krec->next = malloc(sizeof(*krec));
@@ -472,7 +527,7 @@ pushbuf_refn(struct nouveau_pushbuf *push, bool retry,
 	if (ret) {
 		pushbuf_refn_fail(push, sref, krec->nr_reloc);
 		if (retry) {
-			pushbuf_flush(push);
+			pushbuf_flush(push, NULL);
 			nouveau_pushbuf_space(push, 0, 0, 0);
 			return pushbuf_refn(push, false, refs, nr);
 		}
@@ -524,7 +579,7 @@ pushbuf_validate(struct nouveau_pushbuf *push, bool retry)
 	if (ret) {
 		pushbuf_refn_fail(push, sref, srel);
 		if (retry) {
-			pushbuf_flush(push);
+			pushbuf_flush(push, NULL);
 			return pushbuf_validate(push, false);
 		}
 	}
@@ -676,7 +731,7 @@ nouveau_pushbuf_space(struct nouveau_pushbuf *push,
 	    krec->nr_reloc + relocs >= NOUVEAU_GEM_MAX_RELOCS ||
 	    krec->nr_push + pushes >= NOUVEAU_GEM_MAX_PUSH) {
 		if (nvpb->bo && krec->nr_buffer)
-			pushbuf_flush(push);
+			pushbuf_flush(push, NULL);
 		flushed = true;
 	}
 
@@ -772,10 +827,17 @@ nouveau_pushbuf_refd(struct nouveau_pushbuf *push, struct nouveau_bo *bo)
 }
 
 int
-nouveau_pushbuf_kick(struct nouveau_pushbuf *push, struct nouveau_object *chan)
+nouveau_pushbuf_kick_fence(struct nouveau_pushbuf *push,
+                           struct nouveau_object *chan, int *fence)
 {
 	if (!push->channel)
-		return pushbuf_submit(push, chan);
-	pushbuf_flush(push);
+		return pushbuf_submit(push, chan, fence);
+	pushbuf_flush(push, fence);
 	return pushbuf_validate(push, false);
 }
+
+int
+nouveau_pushbuf_kick(struct nouveau_pushbuf *pushbuf, struct nouveau_object *chan)
+{
+    return nouveau_pushbuf_kick_fence(pushbuf, chan, NULL);
+}
author	Thierry Reding <treding@nvidia.com>	2017-10-02 15:51:08 +0200
committer	Thierry Reding <treding@nvidia.com>	2018-01-11 22:46:54 +0100
commit	1d2ca5ab5153fcd849e37eb1f584336546b720bd (patch)
tree	fc59109a06e0ac01facd07fbd4f31d8deb6805a4
parent	d3cb58831517d4d42869a7c1f518c861baafdc8e (diff)