1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
|
// SPDX-License-Identifier: MIT
#include "nouveau_drv.h"
#include "nouveau_gem.h"
#include "nouveau_mem.h"
#include "nouveau_dma.h"
#include "nouveau_exec.h"
#include "nouveau_abi16.h"
#include "nouveau_chan.h"
#include "nouveau_sched.h"
#include "nouveau_uvmm.h"
/**
* DOC: Overview
*
* Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
* DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
*
* In order to use the UAPI firstly a user client must initialize the VA space
* using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
* should be managed by the kernel and which by the UMD.
*
* The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
* userspace-managable portion of the VA space. It provides operations to map
* and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
* backed by a GEM object and the kernel will ignore GEM handles provided
* alongside a sparse mapping.
*
* Userspace may request memory backed mappings either within or outside of the
* bounds (but not crossing those bounds) of a previously mapped sparse
* mapping. Subsequently requested memory backed mappings within a sparse
* mapping will take precedence over the corresponding range of the sparse
* mapping. If such memory backed mappings are unmapped the kernel will make
* sure that the corresponding sparse mapping will take their place again.
* Requests to unmap a sparse mapping that still contains memory backed mappings
* will result in those memory backed mappings being unmapped first.
*
* Unmap requests are not bound to the range of existing mappings and can even
* overlap the bounds of sparse mappings. For such a request the kernel will
* make sure to unmap all memory backed mappings within the given range,
* splitting up memory backed mappings which are only partially contained
* within the given range. Unmap requests with the sparse flag set must match
* the range of a previously mapped sparse mapping exactly though.
*
* While the kernel generally permits arbitrary sequences and ranges of memory
* backed mappings being mapped and unmapped, either within a single or multiple
* VM_BIND ioctl calls, there are some restrictions for sparse mappings.
*
* The kernel does not permit to:
* - unmap non-existent sparse mappings
* - unmap a sparse mapping and map a new sparse mapping overlapping the range
* of the previously unmapped sparse mapping within the same VM_BIND ioctl
* - unmap a sparse mapping and map new memory backed mappings overlapping the
* range of the previously unmapped sparse mapping within the same VM_BIND
* ioctl
*
* When using the VM_BIND ioctl to request the kernel to map memory to a given
* virtual address in the GPU's VA space there is no guarantee that the actual
* mappings are created in the GPU's MMU. If the given memory is swapped out
* at the time the bind operation is executed the kernel will stash the mapping
* details into it's internal alloctor and create the actual MMU mappings once
* the memory is swapped back in. While this is transparent for userspace, it is
* guaranteed that all the backing memory is swapped back in and all the memory
* mappings, as requested by userspace previously, are actually mapped once the
* DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
*
* A VM_BIND job can be executed either synchronously or asynchronously. If
* exectued asynchronously, userspace may provide a list of syncobjs this job
* will wait for and/or a list of syncobj the kernel will signal once the
* VM_BIND job finished execution. If executed synchronously the ioctl will
* block until the bind job is finished. For synchronous jobs the kernel will
* not permit any syncobjs submitted to the kernel.
*
* To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
* jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
* the option to synchronize them with syncobjs.
*
* Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
*
* Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
* an up to date view of the VA space. However, the actual mappings might still
* be pending. Hence, EXEC jobs require to have the particular fences - of
* the corresponding VM_BIND jobs they depent on - attached to them.
*/
static int
nouveau_exec_job_submit(struct nouveau_job *job,
struct drm_gpuvm_exec *vme)
{
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
struct nouveau_cli *cli = job->cli;
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
int ret;
/* Create a new fence, but do not emit yet. */
ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
if (ret)
return ret;
nouveau_uvmm_lock(uvmm);
ret = drm_gpuvm_exec_lock(vme);
if (ret) {
nouveau_uvmm_unlock(uvmm);
return ret;
}
nouveau_uvmm_unlock(uvmm);
ret = drm_gpuvm_exec_validate(vme);
if (ret) {
drm_gpuvm_exec_unlock(vme);
return ret;
}
return 0;
}
static void
nouveau_exec_job_armed_submit(struct nouveau_job *job,
struct drm_gpuvm_exec *vme)
{
drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
job->resv_usage, job->resv_usage);
drm_gpuvm_exec_unlock(vme);
}
static struct dma_fence *
nouveau_exec_job_run(struct nouveau_job *job)
{
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
struct nouveau_channel *chan = exec_job->chan;
struct nouveau_fence *fence = exec_job->fence;
int i, ret;
ret = nouveau_dma_wait(chan, exec_job->push.count + 1, 16);
if (ret) {
NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
return ERR_PTR(ret);
}
for (i = 0; i < exec_job->push.count; i++) {
struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
}
ret = nouveau_fence_emit(fence);
if (ret) {
nouveau_fence_unref(&exec_job->fence);
NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
WIND_RING(chan);
return ERR_PTR(ret);
}
/* The fence was emitted successfully, set the job's fence pointer to
* NULL in order to avoid freeing it up when the job is cleaned up.
*/
exec_job->fence = NULL;
return &fence->base;
}
static void
nouveau_exec_job_free(struct nouveau_job *job)
{
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
nouveau_job_done(job);
nouveau_job_free(job);
kfree(exec_job->fence);
kfree(exec_job->push.s);
kfree(exec_job);
}
static enum drm_gpu_sched_stat
nouveau_exec_job_timeout(struct nouveau_job *job)
{
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
struct nouveau_channel *chan = exec_job->chan;
if (unlikely(!atomic_read(&chan->killed)))
nouveau_channel_kill(chan);
NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
chan->chid);
return DRM_GPU_SCHED_STAT_NOMINAL;
}
static const struct nouveau_job_ops nouveau_exec_job_ops = {
.submit = nouveau_exec_job_submit,
.armed_submit = nouveau_exec_job_armed_submit,
.run = nouveau_exec_job_run,
.free = nouveau_exec_job_free,
.timeout = nouveau_exec_job_timeout,
};
int
nouveau_exec_job_init(struct nouveau_exec_job **pjob,
struct nouveau_exec_job_args *__args)
{
struct nouveau_exec_job *job;
struct nouveau_job_args args = {};
int i, ret;
for (i = 0; i < __args->push.count; i++) {
struct drm_nouveau_exec_push *p = &__args->push.s[i];
if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
NV_PRINTK(err, nouveau_cli(__args->file_priv),
"pushbuf size exceeds limit: 0x%x max 0x%x\n",
p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
return -EINVAL;
}
}
job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL);
if (!job)
return -ENOMEM;
job->push.count = __args->push.count;
if (__args->push.count) {
job->push.s = kmemdup(__args->push.s,
sizeof(*__args->push.s) *
__args->push.count,
GFP_KERNEL);
if (!job->push.s) {
ret = -ENOMEM;
goto err_free_job;
}
}
args.file_priv = __args->file_priv;
job->chan = __args->chan;
args.sched = __args->sched;
/* Plus one to account for the HW fence. */
args.credits = job->push.count + 1;
args.in_sync.count = __args->in_sync.count;
args.in_sync.s = __args->in_sync.s;
args.out_sync.count = __args->out_sync.count;
args.out_sync.s = __args->out_sync.s;
args.ops = &nouveau_exec_job_ops;
args.resv_usage = DMA_RESV_USAGE_WRITE;
ret = nouveau_job_init(&job->base, &args);
if (ret)
goto err_free_pushs;
return 0;
err_free_pushs:
kfree(job->push.s);
err_free_job:
kfree(job);
*pjob = NULL;
return ret;
}
static int
nouveau_exec(struct nouveau_exec_job_args *args)
{
struct nouveau_exec_job *job;
int ret;
ret = nouveau_exec_job_init(&job, args);
if (ret)
return ret;
ret = nouveau_job_submit(&job->base);
if (ret)
goto err_job_fini;
return 0;
err_job_fini:
nouveau_job_fini(&job->base);
return ret;
}
static int
nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
struct drm_nouveau_exec *req)
{
struct drm_nouveau_sync **s;
u32 inc = req->wait_count;
u64 ins = req->wait_ptr;
u32 outc = req->sig_count;
u64 outs = req->sig_ptr;
u32 pushc = req->push_count;
u64 pushs = req->push_ptr;
int ret;
if (pushc) {
args->push.count = pushc;
args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s));
if (IS_ERR(args->push.s))
return PTR_ERR(args->push.s);
}
if (inc) {
s = &args->in_sync.s;
args->in_sync.count = inc;
*s = u_memcpya(ins, inc, sizeof(**s));
if (IS_ERR(*s)) {
ret = PTR_ERR(*s);
goto err_free_pushs;
}
}
if (outc) {
s = &args->out_sync.s;
args->out_sync.count = outc;
*s = u_memcpya(outs, outc, sizeof(**s));
if (IS_ERR(*s)) {
ret = PTR_ERR(*s);
goto err_free_ins;
}
}
return 0;
err_free_pushs:
u_free(args->push.s);
err_free_ins:
u_free(args->in_sync.s);
return ret;
}
static void
nouveau_exec_ufree(struct nouveau_exec_job_args *args)
{
u_free(args->push.s);
u_free(args->in_sync.s);
u_free(args->out_sync.s);
}
int
nouveau_exec_ioctl_exec(struct drm_device *dev,
void *data,
struct drm_file *file_priv)
{
struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
struct nouveau_cli *cli = nouveau_cli(file_priv);
struct nouveau_abi16_chan *chan16;
struct nouveau_channel *chan = NULL;
struct nouveau_exec_job_args args = {};
struct drm_nouveau_exec *req = data;
int push_max, ret = 0;
if (unlikely(!abi16))
return -ENOMEM;
/* abi16 locks already */
if (unlikely(!nouveau_cli_uvmm(cli)))
return nouveau_abi16_put(abi16, -ENOSYS);
list_for_each_entry(chan16, &abi16->channels, head) {
if (chan16->chan->chid == req->channel) {
chan = chan16->chan;
break;
}
}
if (!chan)
return nouveau_abi16_put(abi16, -ENOENT);
if (unlikely(atomic_read(&chan->killed)))
return nouveau_abi16_put(abi16, -ENODEV);
if (!chan->dma.ib_max)
return nouveau_abi16_put(abi16, -ENOSYS);
push_max = nouveau_exec_push_max_from_ib_max(chan->dma.ib_max);
if (unlikely(req->push_count > push_max)) {
NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
req->push_count, push_max);
return nouveau_abi16_put(abi16, -EINVAL);
}
ret = nouveau_exec_ucopy(&args, req);
if (ret)
goto out;
args.sched = chan16->sched;
args.file_priv = file_priv;
args.chan = chan;
ret = nouveau_exec(&args);
if (ret)
goto out_free_args;
out_free_args:
nouveau_exec_ufree(&args);
out:
return nouveau_abi16_put(abi16, ret);
}
|