summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_render_state.c
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2017-04-25 16:35:35 +0100
committerMark Brown <broonie@kernel.org>2017-04-25 16:35:35 +0100
commit9095bf25ea08135a5b74875dd0e3eeaddc4218a0 (patch)
treea4e6976eab0e7a5b6258281077cd5853c7e69514 /drivers/gpu/drm/i915/i915_gem_render_state.c
parentcdf4275e957c6bad3756e98942341667f1d7de7d (diff)
parentc1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201 (diff)
Merge tag 'v4.11-rc1' into regulator-arizona
Linux 4.11-rc1
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_render_state.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_render_state.c188
1 files changed, 119 insertions, 69 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 95b7e9afd5f8..7032c542a9b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,17 +28,19 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
-struct render_state {
+struct intel_render_state {
const struct intel_renderstate_rodata *rodata;
struct i915_vma *vma;
- u32 aux_batch_size;
- u32 aux_batch_offset;
+ u32 batch_offset;
+ u32 batch_size;
+ u32 aux_offset;
+ u32 aux_size;
};
static const struct intel_renderstate_rodata *
-render_state_get_rodata(const struct drm_i915_gem_request *req)
+render_state_get_rodata(const struct intel_engine_cs *engine)
{
- switch (INTEL_GEN(req->i915)) {
+ switch (INTEL_GEN(engine->i915)) {
case 6:
return &gen6_null_state;
case 7:
@@ -58,34 +60,31 @@ render_state_get_rodata(const struct drm_i915_gem_request *req)
* this is sufficient as the null state generator makes the final batch
* with two passes to build command and state separately. At this point
* the size of both are known and it compacts them by relocating the state
- * right after the commands taking care of aligment so we should sufficient
+ * right after the commands taking care of alignment so we should sufficient
* space below them for adding new commands.
*/
#define OUT_BATCH(batch, i, val) \
do { \
- if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) { \
- ret = -ENOSPC; \
- goto err_out; \
- } \
+ if ((i) >= PAGE_SIZE / sizeof(u32)) \
+ goto err; \
(batch)[(i)++] = (val); \
} while(0)
-static int render_state_setup(struct render_state *so)
+static int render_state_setup(struct intel_render_state *so,
+ struct drm_i915_private *i915)
{
- struct drm_device *dev = so->vma->vm->dev;
const struct intel_renderstate_rodata *rodata = so->rodata;
- const bool has_64bit_reloc = INTEL_GEN(dev) >= 8;
+ struct drm_i915_gem_object *obj = so->vma->obj;
unsigned int i = 0, reloc_index = 0;
- struct page *page;
+ unsigned int needs_clflush;
u32 *d;
int ret;
- ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true);
+ ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
if (ret)
return ret;
- page = i915_gem_object_get_dirty_page(so->vma->obj, 0);
- d = kmap(page);
+ d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0));
while (i < rodata->batch_items) {
u32 s = rodata->batch[i];
@@ -93,12 +92,10 @@ static int render_state_setup(struct render_state *so)
if (i * 4 == rodata->reloc[reloc_index]) {
u64 r = s + so->vma->node.start;
s = lower_32_bits(r);
- if (has_64bit_reloc) {
+ if (HAS_64BIT_RELOC(i915)) {
if (i + 1 >= rodata->batch_items ||
- rodata->batch[i + 1] != 0) {
- ret = -EINVAL;
- goto err_out;
- }
+ rodata->batch[i + 1] != 0)
+ goto err;
d[i++] = s;
s = upper_32_bits(r);
@@ -110,12 +107,20 @@ static int render_state_setup(struct render_state *so)
d[i++] = s;
}
+ if (rodata->reloc[reloc_index] != -1) {
+ DRM_ERROR("only %d relocs resolved\n", reloc_index);
+ goto err;
+ }
+
+ so->batch_offset = so->vma->node.start;
+ so->batch_size = rodata->batch_items * sizeof(u32);
+
while (i % CACHELINE_DWORDS)
OUT_BATCH(d, i, MI_NOOP);
- so->aux_batch_offset = i * sizeof(u32);
+ so->aux_offset = i * sizeof(u32);
- if (HAS_POOLED_EU(dev)) {
+ if (HAS_POOLED_EU(i915)) {
/*
* We always program 3x6 pool config but depending upon which
* subslice is disabled HW drops down to appropriate config
@@ -143,88 +148,133 @@ static int render_state_setup(struct render_state *so)
}
OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
- so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
-
+ so->aux_size = i * sizeof(u32) - so->aux_offset;
+ so->aux_offset += so->batch_offset;
/*
* Since we are sending length, we need to strictly conform to
* all requirements. For Gen2 this must be a multiple of 8.
*/
- so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
-
- kunmap(page);
-
- ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false);
- if (ret)
- return ret;
-
- if (rodata->reloc[reloc_index] != -1) {
- DRM_ERROR("only %d relocs resolved\n", reloc_index);
- return -EINVAL;
- }
+ so->aux_size = ALIGN(so->aux_size, 8);
- return 0;
+ if (needs_clflush)
+ drm_clflush_virt_range(d, i * sizeof(u32));
+ kunmap_atomic(d);
-err_out:
- kunmap(page);
+ ret = i915_gem_object_set_to_gtt_domain(obj, false);
+out:
+ i915_gem_obj_finish_shmem_access(obj);
return ret;
+
+err:
+ kunmap_atomic(d);
+ ret = -EINVAL;
+ goto out;
}
#undef OUT_BATCH
-int i915_gem_render_state_init(struct drm_i915_gem_request *req)
+int i915_gem_render_state_init(struct intel_engine_cs *engine)
{
- struct render_state so;
+ struct intel_render_state *so;
+ const struct intel_renderstate_rodata *rodata;
struct drm_i915_gem_object *obj;
int ret;
- if (WARN_ON(req->engine->id != RCS))
- return -ENOENT;
+ if (engine->id != RCS)
+ return 0;
- so.rodata = render_state_get_rodata(req);
- if (!so.rodata)
+ rodata = render_state_get_rodata(engine);
+ if (!rodata)
return 0;
- if (so.rodata->batch_items * 4 > 4096)
+ if (rodata->batch_items * 4 > PAGE_SIZE)
return -EINVAL;
- obj = i915_gem_object_create(&req->i915->drm, 4096);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
+ so = kmalloc(sizeof(*so), GFP_KERNEL);
+ if (!so)
+ return -ENOMEM;
- so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL);
- if (IS_ERR(so.vma)) {
- ret = PTR_ERR(so.vma);
- goto err_obj;
+ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
+ goto err_free;
}
- ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL);
- if (ret)
+ so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
+ if (IS_ERR(so->vma)) {
+ ret = PTR_ERR(so->vma);
goto err_obj;
+ }
+
+ so->rodata = rodata;
+ engine->render_state = so;
+ return 0;
+
+err_obj:
+ i915_gem_object_put(obj);
+err_free:
+ kfree(so);
+ return ret;
+}
+
+int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
+{
+ struct intel_render_state *so;
+ int ret;
+
+ lockdep_assert_held(&req->i915->drm.struct_mutex);
- ret = render_state_setup(&so);
+ so = req->engine->render_state;
+ if (!so)
+ return 0;
+
+ /* Recreate the page after shrinking */
+ if (!so->vma->obj->mm.pages)
+ so->batch_offset = -1;
+
+ ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (ret)
- goto err_unpin;
+ return ret;
+
+ if (so->vma->node.start != so->batch_offset) {
+ ret = render_state_setup(so, req->i915);
+ if (ret)
+ goto err_unpin;
+ }
- ret = req->engine->emit_bb_start(req, so.vma->node.start,
- so.rodata->batch_items * 4,
+ ret = req->engine->emit_bb_start(req,
+ so->batch_offset, so->batch_size,
I915_DISPATCH_SECURE);
if (ret)
goto err_unpin;
- if (so.aux_batch_size > 8) {
+ if (so->aux_size > 8) {
ret = req->engine->emit_bb_start(req,
- (so.vma->node.start +
- so.aux_batch_offset),
- so.aux_batch_size,
+ so->aux_offset, so->aux_size,
I915_DISPATCH_SECURE);
if (ret)
goto err_unpin;
}
- i915_vma_move_to_active(so.vma, req, 0);
+ i915_vma_move_to_active(so->vma, req, 0);
err_unpin:
- i915_vma_unpin(so.vma);
-err_obj:
- i915_gem_object_put(obj);
+ i915_vma_unpin(so->vma);
return ret;
}
+
+void i915_gem_render_state_fini(struct intel_engine_cs *engine)
+{
+ struct intel_render_state *so;
+ struct drm_i915_gem_object *obj;
+
+ so = fetch_and_zero(&engine->render_state);
+ if (!so)
+ return;
+
+ obj = so->vma->obj;
+
+ i915_vma_close(so->vma);
+ __i915_gem_object_release_unless_active(obj);
+
+ kfree(so);
+}