diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2010-01-22 20:48:54 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2010-01-22 23:01:52 +0000 |
commit | 77afe8491ed7038a8399c01f10d8f062a7239225 (patch) | |
tree | 0af0013ef61936fffb9e4a69605856866da5b677 /src | |
parent | b9407af6a4bc792c1bcb52c90aa8a618627bb618 (diff) |
drm: Add backends for i915 and i965.
As proof-of-principle add the nearly working demonstrations of using DRM
to render directly with the GPU bypassing both RENDER and GL for
performance whilst preserving high quality rendering.
The basis behind developing these chip specific backends is that this is
the idealised interface that we desire for this chips, and so a target
for cairo-gl as we continue to develop both it and our GL stack.
Note that this backends do not yet fully pass the test suite, so only
use if you are brave and willing to help develop them further.
Diffstat (limited to 'src')
34 files changed, 22674 insertions, 1115 deletions
diff --git a/src/Makefile.sources b/src/Makefile.sources index 8d688608..f6d4fe30 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -310,12 +310,29 @@ cairo_directfb_sources = cairo-directfb-surface.c cairo_drm_headers = cairo-drm.h cairo_drm_private = drm/cairo-drm-private.h \ drm/cairo-drm-intel-private.h \ + drm/cairo-drm-i915-private.h \ + drm/cairo-drm-i965-private.h \ + drm/cairo-drm-intel-brw-defines.h \ + drm/cairo-drm-intel-brw-structs.h \ + drm/cairo-drm-intel-brw-eu.h \ drm/cairo-drm-radeon-private.h cairo_drm_sources = drm/cairo-drm.c \ drm/cairo-drm-bo.c \ drm/cairo-drm-surface.c \ drm/cairo-drm-intel.c \ + drm/cairo-drm-intel-debug.c \ drm/cairo-drm-intel-surface.c \ + drm/cairo-drm-i915-surface.c \ + drm/cairo-drm-i915-glyphs.c \ + drm/cairo-drm-i915-shader.c \ + drm/cairo-drm-i915-spans.c \ + drm/cairo-drm-i965-surface.c \ + drm/cairo-drm-i965-glyphs.c \ + drm/cairo-drm-i965-shader.c \ + drm/cairo-drm-i965-spans.c \ + drm/cairo-drm-intel-brw-eu.c \ + drm/cairo-drm-intel-brw-eu-emit.c \ + drm/cairo-drm-intel-brw-eu-util.c \ drm/cairo-drm-radeon.c \ drm/cairo-drm-radeon-surface.c cairo_gallium_sources = drm/cairo-drm-gallium-surface.c diff --git a/src/cairo-drm.h b/src/cairo-drm.h index 1b50b1bd..52b9de21 100644 --- a/src/cairo-drm.h +++ b/src/cairo-drm.h @@ -39,56 +39,41 @@ CAIRO_BEGIN_DECLS -typedef struct _cairo_drm_device cairo_drm_device_t; - struct udev_device; -cairo_public cairo_drm_device_t * +cairo_public cairo_device_t * cairo_drm_device_get (struct udev_device *device); -cairo_public cairo_drm_device_t * +cairo_public cairo_device_t * cairo_drm_device_get_for_fd (int fd); -cairo_public cairo_drm_device_t * +cairo_public cairo_device_t * cairo_drm_device_default (void); -cairo_public cairo_drm_device_t * -cairo_drm_device_reference (cairo_drm_device_t *device); - -cairo_public cairo_status_t -cairo_drm_device_status (cairo_drm_device_t *device); - cairo_public int -cairo_drm_device_get_fd (cairo_drm_device_t *device); +cairo_drm_device_get_fd (cairo_device_t *device); cairo_public void -cairo_drm_device_throttle (cairo_drm_device_t *device); - -cairo_public void -cairo_drm_device_destroy (cairo_drm_device_t *device); - +cairo_drm_device_throttle (cairo_device_t *device); cairo_public cairo_surface_t * -cairo_drm_surface_create (cairo_drm_device_t *device, +cairo_drm_surface_create (cairo_device_t *device, cairo_content_t content, int width, int height); cairo_public cairo_surface_t * -cairo_drm_surface_create_for_name (cairo_drm_device_t *device, +cairo_drm_surface_create_for_name (cairo_device_t *device, unsigned int name, cairo_format_t format, int width, int height, int stride); cairo_public cairo_surface_t * -cairo_drm_surface_create_from_cacheable_image (cairo_drm_device_t *device, +cairo_drm_surface_create_from_cacheable_image (cairo_device_t *device, cairo_surface_t *surface); cairo_public cairo_status_t cairo_drm_surface_enable_scan_out (cairo_surface_t *surface); -cairo_public cairo_drm_device_t * -cairo_drm_surface_get_device (cairo_surface_t *abstract_surface); - cairo_public unsigned int cairo_drm_surface_get_handle (cairo_surface_t *surface); diff --git a/src/cairo-misc.c b/src/cairo-misc.c index cd1032a4..56409afc 100644 --- a/src/cairo-misc.c +++ b/src/cairo-misc.c @@ -674,6 +674,63 @@ _cairo_lround (double d) #undef LSW } +/* Convert a 32-bit IEEE single precision floating point number to a + * 'half' representation (s10.5) + */ +uint16_t +_cairo_half_from_float (float f) +{ + union { + uint32_t ui; + float f; + } u; + int s, e, m; + + u.f = f; + s = (u.ui >> 16) & 0x00008000; + e = ((u.ui >> 23) & 0x000000ff) - (127 - 15); + m = u.ui & 0x007fffff; + if (e <= 0) { + if (e < -10) { + /* underflow */ + return 0; + } + + m = (m | 0x00800000) >> (1 - e); + + /* round to nearest, round 0.5 up. */ + if (m & 0x00001000) + m += 0x00002000; + return s | (m >> 13); + } else if (e == 0xff - (127 - 15)) { + if (m == 0) { + /* infinity */ + return s | 0x7c00; + } else { + /* nan */ + m >>= 13; + return s | 0x7c00 | m | (m == 0); + } + } else { + /* round to nearest, round 0.5 up. */ + if (m & 0x00001000) { + m += 0x00002000; + + if (m & 0x00800000) { + m = 0; + e += 1; + } + } + + if (e > 30) { + /* overflow -> infinity */ + return s | 0x7c00; + } + + return s | (e << 10) | (m >> 13); + } +} + #ifdef _WIN32 diff --git a/src/cairoint.h b/src/cairoint.h index 6e059c4c..0892bbbd 100644 --- a/src/cairoint.h +++ b/src/cairoint.h @@ -949,6 +949,8 @@ _cairo_round (double r) cairo_private int _cairo_lround (double d) cairo_const; +cairo_private uint16_t +_cairo_half_from_float (float f) cairo_const; /* cairo-gstate.c */ cairo_private cairo_status_t diff --git a/src/drm/cairo-drm-bo.c b/src/drm/cairo-drm-bo.c index 980484a2..cb74bacc 100644 --- a/src/drm/cairo-drm-bo.c +++ b/src/drm/cairo-drm-bo.c @@ -31,11 +31,14 @@ #include "cairo-drm-private.h" #include "cairo-drm-ioctl-private.h" + #include "cairo-error-private.h" #include <sys/ioctl.h> #include <errno.h> +#define ERR_DEBUG(x) x + struct drm_gem_close { /** Handle of the object to be closed. */ uint32_t handle; @@ -79,8 +82,11 @@ _cairo_drm_bo_open_for_name (const cairo_drm_device_t *dev, do { ret = ioctl (dev->fd, DRM_IOCTL_GEM_OPEN, &open); } while (ret == -1 && errno == EINTR); - if (ret == -1) + if (ret == -1) { + ERR_DEBUG((fprintf (stderr, "Failed to open bo for name %d: %s\n", + name, strerror (errno)))); return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } bo->name = name; bo->size = open.size; @@ -99,8 +105,11 @@ _cairo_drm_bo_flink (const cairo_drm_device_t *dev, memset (&flink, 0, sizeof (flink)); flink.handle = bo->handle; ret = ioctl (dev->fd, DRM_IOCTL_GEM_FLINK, &flink); - if (ret == -1) + if (ret == -1) { + ERR_DEBUG((fprintf (stderr, "Failed to flink bo: %s\n", + strerror (errno)))); return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } bo->name = flink.name; diff --git a/src/drm/cairo-drm-gallium-surface.c b/src/drm/cairo-drm-gallium-surface.c index c2f331bc..73d75478 100644 --- a/src/drm/cairo-drm-gallium-surface.c +++ b/src/drm/cairo-drm-gallium-surface.c @@ -444,7 +444,6 @@ gallium_surface_create_internal (gallium_device_t *device, _cairo_surface_init (&surface->base.base, &gallium_surface_backend, - NULL, /* device */ content); _cairo_drm_surface_init (&surface->base, &device->base); @@ -551,7 +550,6 @@ gallium_surface_create_for_name (cairo_drm_device_t *base_dev, content = _cairo_content_from_format (format); _cairo_surface_init (&surface->base.base, &gallium_surface_backend, - NULL, /* device */ content); _cairo_drm_surface_init (&surface->base, base_dev); @@ -659,6 +657,7 @@ _cairo_drm_gallium_device_create (int fd, dev_t dev, int vendor_id, int chip_id) device->base.surface.enable_scan_out = NULL; device->base.surface.flink = gallium_surface_flink; + device->base.device.flush = NULL; device->base.device.throttle = NULL; device->base.device.destroy = gallium_device_destroy; diff --git a/src/drm/cairo-drm-i915-glyphs.c b/src/drm/cairo-drm-i915-glyphs.c new file mode 100644 index 00000000..babd59e6 --- /dev/null +++ b/src/drm/cairo-drm-i915-glyphs.c @@ -0,0 +1,534 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Red Hat, Inc. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-composite-rectangles-private.h" +#include "cairo-drm-i915-private.h" +#include "cairo-error-private.h" +#include "cairo-rtree-private.h" + +static void +i915_emit_glyph_rectangle_constant (i915_device_t *device, + i915_shader_t *shader, + int x1, int y1, + int x2, int y2, + intel_glyph_t *glyph) +{ + float *v; + + /* Each vertex is: + * 2 vertex coordinates + * 2 glyph texture coordinates + */ + + v = i915_add_rectangle (device); + + /* bottom right */ + *v++ = x2; *v++ = y2; + *v++ = glyph->texcoord[0]; + + /* bottom left */ + *v++ = x1; *v++ = y2; + *v++ = glyph->texcoord[1]; + + /* top left */ + *v++ = x1; *v++ = y1; + *v++ = glyph->texcoord[2]; +} + +static void +i915_emit_glyph_rectangle_general (i915_device_t *device, + i915_shader_t *shader, + int x1, int y1, + int x2, int y2, + intel_glyph_t *glyph) +{ + double s, t; + float *v; + + /* Each vertex is: + * 2 vertex coordinates + * [0-2] source texture coordinates + * 2 glyph texture coordinates + */ + + v = i915_add_rectangle (device); + + /* bottom right */ + *v++ = x2; *v++ = y2; + s = x2, t = y2; + switch (shader->source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->source.linear, s, t); + break; + case VS_RADIAL: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + *v++ = glyph->texcoord[0]; + + /* bottom left */ + *v++ = x1; *v++ = y2; + s = x1, t = y2; + switch (shader->source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->source.linear, s, t); + break; + case VS_RADIAL: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + *v++ = glyph->texcoord[1]; + + /* top left */ + *v++ = x1; *v++ = y1; + s = x1, t = y2; + switch (shader->source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->source.linear, s, t); + break; + case VS_RADIAL: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + *v++ = glyph->texcoord[2]; +} + +typedef void +(*i915_emit_glyph_rectangle_func_t) (i915_device_t *device, + i915_shader_t *shader, + int x1, int y1, + int x2, int y2, + intel_glyph_t *glyph); + +static cairo_status_t +i915_surface_mask_internal (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *source, + i915_surface_t *mask, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + i915_device_t *device; + i915_shader_t shader; + cairo_region_t *clip_region = NULL; + cairo_status_t status; + + i915_shader_init (&shader, dst, op); + + status = i915_shader_acquire_pattern (&shader, &shader.source, + source, &extents->bounded); + if (unlikely (status)) + return status; + + shader.mask.type.vertex = VS_TEXTURE_16; + shader.mask.type.fragment = FS_TEXTURE; + shader.mask.base.content = mask->intel.drm.base.content; + shader.mask.base.texfmt = TEXCOORDFMT_2D_16; + shader.mask.base.n_samplers = 1; + shader.mask.base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_NEAREST); + shader.mask.base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_translate (&shader.mask.base.matrix, + -extents->bounded.x + NEAREST_BIAS, + -extents->bounded.y + NEAREST_BIAS); + cairo_matrix_scale (&shader.mask.base.matrix, + 1. / mask->intel.drm.width, + 1. / mask->intel.drm.height); + + shader.mask.base.bo = to_intel_bo (mask->intel.drm.bo); + shader.mask.base.offset[0] = 0; + shader.mask.base.map[0] = mask->map0; + shader.mask.base.map[1] = mask->map1; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i915_shader_set_clip (&shader, clip); + } + + status = cairo_device_acquire (dst->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SHADER; + + device = i915_device (dst); + + status = i915_shader_commit (&shader, device); + if (unlikely (status)) + goto CLEANUP_DEVICE; + + if (clip_region != NULL) { + unsigned int n, num_rectangles; + + num_rectangles = cairo_region_num_rectangles (clip_region); + for (n = 0; n < num_rectangles; n++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, n, &rect); + + shader.add_rectangle (&shader, + rect.x, rect.y, + rect.x + rect.width, rect.y + rect.height); + } + } else { + shader.add_rectangle (&shader, + extents->bounded.x, extents->bounded.y, + extents->bounded.x + extents->bounded.width, + extents->bounded.y + extents->bounded.height); + } + + if ((extents->is_bounded & CAIRO_OPERATOR_BOUND_BY_MASK) == 0) + status = i915_fixup_unbounded (dst, extents, clip); + +CLEANUP_DEVICE: + cairo_device_release (&device->intel.base.base); +CLEANUP_SHADER: + i915_shader_fini (&shader); + return status; +} + +cairo_int_status_t +i915_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining) +{ + i915_surface_t *surface = abstract_surface; + i915_surface_t *mask = NULL; + i915_device_t *device; + i915_shader_t shader; + cairo_composite_rectangles_t extents; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_bool_t overlap; + cairo_region_t *clip_region = NULL; + intel_bo_t *last_bo = NULL; + i915_emit_glyph_rectangle_func_t emit_func; + cairo_scaled_glyph_t *glyph_cache[64]; + cairo_status_t status; + int mask_x = 0, mask_y = 0; + int i = 0; + + *num_remaining = 0; + status = _cairo_composite_rectangles_init_for_glyphs (&extents, + surface->intel.drm.width, + surface->intel.drm.height, + op, source, + scaled_font, + glyphs, num_glyphs, + clip, + &overlap); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL && extents.is_bounded) { + clip = _cairo_clip_init_copy (&local_clip, clip); + status = _cairo_clip_rectangle (clip, &extents.bounded); + if (unlikely (status)) + return status; + + have_clip = TRUE; + } + + if (overlap || (extents.is_bounded & CAIRO_OPERATOR_BOUND_BY_MASK) == 0) { + cairo_content_t content; + + content = CAIRO_CONTENT_ALPHA; + if (scaled_font->options.antialias == CAIRO_ANTIALIAS_SUBPIXEL) + content |= CAIRO_CONTENT_COLOR; + + mask = (i915_surface_t *) + i915_surface_create_internal (&i915_device (surface)->intel.base, + CAIRO_CONTENT_ALPHA, + extents.bounded.width, + extents.bounded.height, + I915_TILING_DEFAULT, + TRUE); + if (unlikely (mask->intel.drm.base.status)) + return mask->intel.drm.base.status; + + status = _cairo_surface_paint (&mask->intel.drm.base, + CAIRO_OPERATOR_CLEAR, + &_cairo_pattern_clear.base, + NULL); + if (unlikely (status)) { + cairo_surface_destroy (&mask->intel.drm.base); + return status; + } + + i915_shader_init (&shader, mask, CAIRO_OPERATOR_ADD); + + status = i915_shader_acquire_pattern (&shader, &shader.source, + &_cairo_pattern_white.base, + &extents.bounded); + if (unlikely (status)) { + cairo_surface_destroy (&mask->intel.drm.base); + return status; + } + + mask_x = -extents.bounded.x; + mask_y = -extents.bounded.y; + } else { + i915_shader_init (&shader, surface, op); + + status = i915_shader_acquire_pattern (&shader, &shader.source, + source, &extents.bounded); + if (unlikely (status)) + return status; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i915_shader_set_clip (&shader, clip); + } + } + + shader.mask.type.fragment = FS_TEXTURE; + shader.mask.base.content = CAIRO_CONTENT_ALPHA; /* XXX */ + shader.mask.base.texfmt = TEXCOORDFMT_2D_16; + shader.mask.base.n_samplers = 1; + shader.mask.base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_NEAREST); + shader.mask.base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (CAIRO_EXTEND_NONE); + + switch (shader.source.type.vertex) { + case VS_CONSTANT: + emit_func = i915_emit_glyph_rectangle_constant; + break; + default: + case VS_LINEAR: + case VS_RADIAL: + case VS_TEXTURE: + case VS_TEXTURE_16: + emit_func = i915_emit_glyph_rectangle_general; + break; + } + + status = cairo_device_acquire (surface->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SHADER; + + device = i915_device (surface); + + _cairo_scaled_font_freeze_cache (scaled_font); + if (scaled_font->surface_private == NULL) { + /* XXX couple into list to remove on context destruction */ + scaled_font->surface_private = device; + scaled_font->surface_backend = surface->intel.drm.base.backend; + } + + memset (glyph_cache, 0, sizeof (glyph_cache)); + + for (i = 0; i < num_glyphs; i++) { + cairo_scaled_glyph_t *scaled_glyph; + int x, y, x1, x2, y1, y2; + int cache_index = glyphs[i].index % ARRAY_LENGTH (glyph_cache); + intel_glyph_t *glyph; + + scaled_glyph = glyph_cache[cache_index]; + if (scaled_glyph == NULL || + _cairo_scaled_glyph_index (scaled_glyph) != glyphs[i].index) + { + status = _cairo_scaled_glyph_lookup (scaled_font, + glyphs[i].index, + CAIRO_SCALED_GLYPH_INFO_METRICS, + &scaled_glyph); + if (unlikely (status)) + goto FINISH; + + glyph_cache[cache_index] = scaled_glyph; + } + + if (unlikely (scaled_glyph->metrics.width == 0 || + scaled_glyph->metrics.height == 0)) + { + continue; + } + + /* XXX glyph images are snapped to pixel locations */ + x = _cairo_lround (glyphs[i].x); + y = _cairo_lround (glyphs[i].y); + + x1 = x + _cairo_fixed_integer_floor (scaled_glyph->bbox.p1.x); + y1 = y + _cairo_fixed_integer_floor (scaled_glyph->bbox.p1.y); + x2 = x + _cairo_fixed_integer_ceil (scaled_glyph->bbox.p2.x); + y2 = y + _cairo_fixed_integer_ceil (scaled_glyph->bbox.p2.y); + + if (x2 < extents.bounded.x || + y2 < extents.bounded.y || + x1 > extents.bounded.x + extents.bounded.width || + y1 > extents.bounded.y + extents.bounded.height) + { + continue; + } + + if (scaled_glyph->surface_private == NULL) { + status = intel_get_glyph (&device->intel, scaled_font, scaled_glyph); + if (unlikely (status == CAIRO_INT_STATUS_NOTHING_TO_DO)) { + status = CAIRO_STATUS_SUCCESS; + continue; + } + if (unlikely (status)) + goto FINISH; + } + glyph = intel_glyph_pin (scaled_glyph->surface_private); + + if (glyph->cache->buffer.bo != last_bo) { + intel_buffer_cache_t *cache = glyph->cache; + + shader.mask.base.bo = cache->buffer.bo; + shader.mask.base.offset[0] = cache->buffer.offset; + shader.mask.base.map[0] = cache->buffer.map0; + shader.mask.base.map[1] = cache->buffer.map1; + shader.mask.base.content = CAIRO_CONTENT_ALPHA; /* XXX */ + + status = i915_shader_commit (&shader, device); + if (unlikely (status)) + goto FINISH; + + last_bo = cache->buffer.bo; + } + + x1 += mask_x; x2 += mask_x; + y1 += mask_y; y2 += mask_y; + + /* XXX clip glyph */ + emit_func (device, &shader, x1, y1, x2, y2, glyph); + } + + status = CAIRO_STATUS_SUCCESS; + FINISH: + _cairo_scaled_font_thaw_cache (scaled_font); + cairo_device_release (surface->intel.drm.base.device); + CLEANUP_SHADER: + i915_shader_fini (&shader); + + if (unlikely (status == CAIRO_INT_STATUS_UNSUPPORTED)) { + cairo_path_fixed_t path; + + _cairo_path_fixed_init (&path); + status = _cairo_scaled_font_glyph_path (scaled_font, + glyphs + i, num_glyphs - i, + &path); + if (mask_x | mask_y) { + _cairo_path_fixed_translate (&path, + _cairo_fixed_from_int (mask_x), + _cairo_fixed_from_int (mask_y)); + } + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = surface->intel.drm.base.backend->fill (shader.target, + shader.op, + mask != NULL ? &_cairo_pattern_white.base : source, + &path, + CAIRO_FILL_RULE_WINDING, + 0, + scaled_font->options.antialias, + clip); + } + _cairo_path_fixed_fini (&path); + } + + if (mask != NULL) { + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = i915_surface_mask_internal (surface, op, source, mask, + clip, &extents); + } + cairo_surface_finish (&mask->intel.drm.base); + cairo_surface_destroy (&mask->intel.drm.base); + } + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} diff --git a/src/drm/cairo-drm-i915-private.h b/src/drm/cairo-drm-i915-private.h new file mode 100644 index 00000000..060c21c8 --- /dev/null +++ b/src/drm/cairo-drm-i915-private.h @@ -0,0 +1,1169 @@ +/* + * Copyright © 2006, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#ifndef CAIRO_DRM_I915_PRIVATE_H +#define CAIRO_DRM_I915_PRIVATE_H + +#include "cairo-types-private.h" + +#include "cairo-drm-private.h" +#include "cairo-drm-intel-private.h" +#include "cairo-drm-intel-command-private.h" +#include "cairo-drm-intel-ioctl-private.h" +#include "cairo-freelist-private.h" + +#define I915_VERBOSE 1 + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_TYPE_SHIFT 4 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + +/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic + * operations + */ +#define MASK_X 0x1 +#define MASK_Y 0x2 +#define MASK_Z 0x4 +#define MASK_W 0x8 +#define MASK_XYZ (MASK_X | MASK_Y | MASK_Z) +#define MASK_XYZW (MASK_XYZ | MASK_W) +#define MASK_SATURATE 0x10 + +/* Temporary, undeclared regs. Preserved between phases */ +#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0) +#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1) +#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2) +#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3) + +/* Texture coordinate regs. Must be declared. */ +#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0) +#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1) +#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2) +#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3) +#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4) +#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5) +#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6) +#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7) +#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8) +#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9) +#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10) + +/* Constant values */ +#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0) +#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1) +#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2) +#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3) +#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4) +#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5) +#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6) +#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7) + +/* Sampler regs */ +#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0) +#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1) +#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2) +#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3) + +/* Output color */ +#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0) + +/* Output depth */ +#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0) + +/* Unpreserved temporary regs */ +#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0) +#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1) +#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2) +#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3) + +#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3) +#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 3) +#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 3) +#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 3) + +#define REG_CHANNEL_MASK 0x7 + +#define REG_NR(reg) ((reg) & REG_NR_MASK) +#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK) +#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK) + +enum i915_fs_channel { + X_CHANNEL_VAL = 1, + Y_CHANNEL_VAL, + Z_CHANNEL_VAL, + W_CHANNEL_VAL, + ZERO_CHANNEL_VAL, + ONE_CHANNEL_VAL +}; + +#define i915_fs_operand(reg, x, y, z, w) \ + (reg) | \ + (x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \ + (y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \ + (z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \ + (w##_CHANNEL_VAL << W_CHANNEL_SHIFT) + +/** + * Construct an operand description for using a register with no swizzling + */ +#define i915_fs_operand_reg(reg) \ + i915_fs_operand(reg, X, Y, Z, W) + +#define i915_fs_operand_reg_negate(reg) \ + i915_fs_operand(reg, -X, -Y, -Z, -W) + +/** + * Returns an operand containing (0.0, 0.0, 0.0, 0.0). + */ +#define i915_fs_operand_zero() i915_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO) + +/** + * Returns an unused operand + */ +#define i915_fs_operand_none() i915_fs_operand_zero() + +/** + * Returns an operand containing (1.0, 1.0, 1.0, 1.0). + */ +#define i915_fs_operand_one() i915_fs_operand(FS_R0, ONE, ONE, ONE, ONE) + +#define i915_get_hardware_channel_val(val, shift, negate) \ + (((int) val < 0) ? (~val << shift) | negate : (val - 1) << shift) + +/** + * Outputs a fragment shader command to declare a sampler or texture register. + */ +#define i915_fs_dcl(reg) \ +do { \ + OUT_DWORD (D0_DCL | \ + (REG_TYPE(reg) << D0_TYPE_SHIFT) | \ + (REG_NR(reg) << D0_NR_SHIFT) | \ + ((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \ + OUT_DWORD (0); \ + OUT_DWORD (0); \ +} while (0) + +#define i915_fs_texld(dest_reg, sampler_reg, address_reg) \ +do { \ + OUT_DWORD (T0_TEXLD | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_DWORD((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_DWORD (0); \ +} while (0) + +#define i915_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \ + _i915_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2) + +#define i915_fs_arith(op, dest_reg, operand0, operand1, operand2) \ + _i915_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2) + +#define _i915_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \ +do { \ + /* Set up destination register and write mask */ \ + OUT_DWORD (cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \ + (((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_DWORD (i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_DWORD (i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ +} while (0) + +#define _i915_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\ + /* Set up destination register and write mask */ \ + OUT_DWORD (cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (A0_DEST_CHANNEL_ALL) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_DWORD (i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_DWORD (i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ +} while (0) + +#define i915_fs_mov(dest_reg, operand0) \ + i915_fs_arith(MOV, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + +#define i915_fs_mov_masked(dest_reg, dest_mask, operand0) \ + i915_fs_arith_masked (MOV, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + + +#define i915_fs_frc(dest_reg, operand0) \ + i915_fs_arith (FRC, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + +/** Add operand0 and operand1 and put the result in dest_reg */ +#define i915_fs_add(dest_reg, operand0, operand1) \ + i915_fs_arith (ADD, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Multiply operand0 and operand1 and put the result in dest_reg */ +#define i915_fs_mul(dest_reg, operand0, operand1) \ + i915_fs_arith (MUL, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */ +#define i915_fs_rsq(dest_reg, dest_mask, operand0) \ +do { \ + if (dest_mask) { \ + i915_fs_arith_masked (RSQ, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } else { \ + i915_fs_arith (RSQ, dest_reg, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } \ +} while (0) + +/** Puts the minimum of operand0 and operand1 in dest_reg */ +#define i915_fs_min(dest_reg, operand0, operand1) \ + i915_fs_arith (MIN, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Puts the maximum of operand0 and operand1 in dest_reg */ +#define i915_fs_max(dest_reg, operand0, operand1) \ + i915_fs_arith (MAX, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +#define i915_fs_cmp(dest_reg, operand0, operand1, operand2) \ + i915_fs_arith (CMP, dest_reg, operand0, operand1, operand2) + +/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */ +#define i915_fs_mad(dest_reg, dest_mask, op0, op1, op2) \ +do { \ + if (dest_mask) { \ + i915_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (MAD, dest_reg, op0, op1, op2); \ + } \ +} while (0) + +#define i915_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \ +do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \ + } \ +} while (0) + +/** + * Perform a 3-component dot-product of operand0 and operand1 and put the + * resulting scalar in the channels of dest_reg specified by the dest_mask. + */ +#define i915_fs_dp3(dest_reg, dest_mask, op0, op1) \ +do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP3, dest_reg, dest_mask, \ + op0, op1,\ + i915_fs_operand_none()); \ + } else { \ + i915_fs_arith (DP3, dest_reg, op0, op1,\ + i915_fs_operand_none()); \ + } \ +} while (0) + +static inline uint32_t cairo_const +i915_fs_operand_pure_alpha (int pure) +{ + if (pure & (1 << 3)) + return i915_fs_operand_one (); + else + return i915_fs_operand_zero (); +} + +#define I915_TILING_DEFAULT I915_TILING_Y +#define I915_BO_CACHE_BUCKETS 13 /* cache surfaces up to 16 MiB */ + +typedef struct i915_surface i915_surface_t; +typedef struct i915_device i915_device_t; +typedef struct i915_shader i915_shader_t; + +typedef void (*i915_add_rectangle_func_t) (const i915_shader_t *shader, + int x, int y, + int w, int h); + +#define IMAGE_CACHE_WIDTH 1024 +#define IMAGE_CACHE_HEIGHT 1024 + +typedef struct i915_image_private { + cairo_rtree_node_t node; + intel_buffer_cache_t *container; +} i915_image_private_t; + +#define I915_BATCH_SIZE (64*1024) +#define I915_VBO_SIZE (512*1024) +#define I915_MAX_RELOCS 2048 + +enum { + I915_DEBUG_EXEC = 0x1, + I915_DEBUG_SYNC = 0x2, + I915_DEBUG_BATCH = 0x4, + I915_DEBUG_BUFFER = 0x8, + I915_DEBUG_BUFFER_CACHE = 0x10, + I915_DEBUG_BUFFER_ALLOC = 0x20, + I915_DEBUG_GLYPHS = 0x40, + I915_DEBUG_MAP = 0x80, + I915_DEBUG_THROTTLE = 0x100, +}; + +struct i915_device { + intel_device_t intel; + + cairo_bool_t debug; + + struct i915_batch { + intel_bo_t *target_bo[I915_MAX_RELOCS]; + size_t gtt_size; + + struct drm_i915_gem_exec_object2 exec[I915_MAX_RELOCS]; + int exec_count; + + struct drm_i915_gem_relocation_entry reloc[I915_MAX_RELOCS]; + uint16_t reloc_count; + + uint16_t used; + } batch; + + uint32_t vbo; + uint32_t vbo_offset; + uint32_t vbo_used; + uint32_t vbo_max_index; + uint32_t vertex_index; + uint32_t vertex_count; + uint32_t floats_per_vertex; + uint32_t rectangle_size; + intel_bo_t *last_vbo; + uint32_t last_vbo_offset; + uint32_t last_vbo_space; + + i915_shader_t *current_shader; + + i915_surface_t *current_target; + uint32_t current_size; + uint32_t current_diffuse; + uint32_t current_colorbuf; + uint32_t *current_source; + uint32_t *current_mask; + uint32_t *current_clip; + uint32_t current_program; + uint32_t current_texcoords; + uint32_t current_blend; + uint32_t current_constants[8*4]; + uint32_t current_n_constants; + uint32_t current_samplers[2*(3+3*4)]; + uint32_t current_n_samplers; + uint32_t last_source_fragment; + + cairo_list_t image_caches[2]; + + uint32_t batch_header[18]; + uint32_t batch_base[I915_BATCH_SIZE / sizeof (uint32_t)]; + uint8_t vbo_base[I915_VBO_SIZE]; +}; + +enum { + CURRENT_SOURCE = 0x1, + CURRENT_MASK = 0x2, + CURRENT_CLIP = 0x4 +}; + +typedef enum { + VS_CONSTANT, + VS_LINEAR, + VS_RADIAL, + VS_TEXTURE, + VS_TEXTURE_16, +} i915_vertex_shader_t; + +typedef enum { + FS_ZERO, + FS_ONE, + FS_PURE, + FS_CONSTANT, + FS_DIFFUSE, + FS_LINEAR, + FS_RADIAL, + FS_TEXTURE, + FS_YUV, + FS_SPANS, +} i915_fragment_shader_t; + +#define FS_DETAILS_SHIFT 4 + +typedef enum { + PATTERN_BASE, + PATTERN_CONSTANT, + PATTERN_LINEAR, + PATTERN_RADIAL, + PATTERN_TEXTURE, +} i915_shader_channel_t; + +struct i915_surface { + intel_surface_t intel; + + uint32_t map0, map1; + uint32_t colorbuf; + + uint32_t offset; + uint32_t is_current_texture; + + i915_image_private_t *cache; + + intel_bo_t *stencil; + uint32_t stencil_stride; + uint32_t stencil_offset; +}; + +typedef enum { + NONE = 0, + YUV_I420, + /* XXX */ + YUV_YV12, + YUV_YUY2, + YUV_UYVY, +} i915_packed_pixel_t; + +/* read-only container */ +#define I915_PACKED_PIXEL_SURFACE_TYPE 0x1000 +typedef struct i915_packed_pixel_surface { + cairo_surface_t base; + + i915_packed_pixel_t pixel; + + i915_device_t *device; + intel_bo_t *bo; + uint32_t is_current_texture; + + uint32_t offset[4]; + uint32_t stride[4]; + uint32_t width[4]; + uint32_t height[4]; + uint32_t map0[4], map1[4]; +} i915_packed_pixel_surface_t; + +struct i915_shader { + i915_device_t *device; + i915_surface_t *target; + + cairo_operator_t op; + uint32_t blend; + cairo_content_t content; + + cairo_bool_t need_combine; + + i915_add_rectangle_func_t add_rectangle; + + union i915_shader_channel { + struct { + i915_vertex_shader_t vertex; + i915_fragment_shader_t fragment; + i915_shader_channel_t pattern; + } type; + struct i915_shader_base { + i915_vertex_shader_t vertex; + i915_fragment_shader_t fragment; + i915_shader_channel_t pattern; + uint32_t texfmt; + cairo_content_t content; + uint32_t mode; + intel_bo_t *bo; + uint32_t n_samplers; + uint32_t offset[4]; + uint32_t map[2*4]; + uint32_t sampler[2]; + cairo_matrix_t matrix; + } base; + struct i915_shader_solid { + struct i915_shader_base base; + cairo_color_t color; + int pure; + } solid; + struct i915_shader_linear { + struct i915_shader_base base; + struct { + float red, green, blue, alpha; + } color0, color1; + float dx, dy, offset; + } linear; + struct i915_shader_radial { + struct i915_shader_base base; + float constants[8]; + } radial; + struct i915_shader_surface { + struct i915_shader_base base; + i915_packed_pixel_t pixel; + } surface; + } source, mask, clip, dst; +}; + +enum i915_shader_linear_mode { + /* XXX REFLECT */ + LINEAR_TEXTURE, + LINEAR_NONE, + LINEAR_REPEAT, + LINEAR_PAD, +}; + +enum i915_shader_radial_mode { + RADIAL_ONE, + RADIAL_TWO +}; + +typedef cairo_status_t +(*i915_spans_func_t) (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents); + +cairo_private cairo_status_t +i915_clip_and_composite_spans (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + i915_spans_func_t draw_func, + void *draw_closure, + const cairo_composite_rectangles_t*extents, + cairo_clip_t *clip); + +cairo_private cairo_surface_t * +i915_surface_create_internal (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height, + uint32_t tiling, + cairo_bool_t gpu_target); + +cairo_private i915_surface_t * +i915_surface_create_from_cacheable_image_internal (i915_device_t *device, + cairo_image_surface_t *image); + +cairo_private void +i915_surface_scaled_font_fini (cairo_scaled_font_t *scaled_font); + +cairo_private cairo_int_status_t +i915_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining); + +static inline int cairo_const +i915_tiling_height (uint32_t tiling, int height) +{ + switch (tiling) { + default: + case I915_TILING_NONE: return (height + 1) & -2; + case I915_TILING_X: return (height + 7) & -8; + case I915_TILING_Y: return (height + 31) & -32; + } +} + +static inline uint32_t cairo_const +i915_tiling_stride (int format, uint32_t stride) +{ + uint32_t tile_width; + + if (format == I915_TILING_NONE) + return (stride + 31) & -32; + + tile_width = 512; + /* XXX Currently the kernel enforces a tile_width of 512 for TILING_Y. + + <jbarnes> the docs are a bit confused on that front + <jbarnes> once we enable it on 915 we'll find out what the tile width size should be in the fence setup + <jbarnes> it could be that 915 has y tiling but that the minimum width is 512 or something + <jbarnes> yeah it's probably 128 on 915 also + <jbarnes> it's just that we haven't tested + <jbarnes> but I wasn't thinking that the tile widths were the same + <jbarnes> only that in order to fence y tiles on 915 you needed pitch to be a multiple of 4 y tiles (or something like that) + + tile_width = format == I915_TILING_Y ? 128 : 512; + */ + + /* needs a pot tile width */ + while (tile_width < stride) + tile_width <<= 1; + + return tile_width; +} + +static inline uint32_t cairo_const +i915_tiling_size (uint32_t tiling, uint32_t size) +{ + uint32_t fence; + + if (tiling == I915_TILING_NONE) + return (size + 4095) & -4096; + + fence = 1024 * 1024; /* 1 MiB */ + while (fence < size) + fence <<= 1; + + return fence; +} + +static inline cairo_bool_t cairo_pure +i915_texture_filter_is_nearest (cairo_filter_t filter) +{ + switch (filter) { + case CAIRO_FILTER_BEST: + case CAIRO_FILTER_GOOD: + case CAIRO_FILTER_BILINEAR: + case CAIRO_FILTER_GAUSSIAN: + return FALSE; + default: + case CAIRO_FILTER_FAST: + case CAIRO_FILTER_NEAREST: + return TRUE; + } +} + +static inline uint32_t cairo_pure +i915_texture_filter (cairo_filter_t filter) +{ + switch (filter) { + case CAIRO_FILTER_BEST: + case CAIRO_FILTER_GOOD: + case CAIRO_FILTER_BILINEAR: + case CAIRO_FILTER_GAUSSIAN: + return + (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT); + default: + case CAIRO_FILTER_FAST: + case CAIRO_FILTER_NEAREST: + return + (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT) | + (FILTER_NEAREST << SS2_MIN_FILTER_SHIFT); + } +} + +static inline uint32_t cairo_pure +i915_texture_extend (cairo_extend_t extend) +{ + switch (extend) { + default: + case CAIRO_EXTEND_NONE: + return + (TEXCOORDMODE_CLAMP_BORDER << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_BORDER << SS3_TCY_ADDR_MODE_SHIFT); + case CAIRO_EXTEND_REPEAT: + return + (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT); + case CAIRO_EXTEND_PAD: + return + (TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT); + case CAIRO_EXTEND_REFLECT: + return + (TEXCOORDMODE_MIRROR << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_MIRROR << SS3_TCY_ADDR_MODE_SHIFT); + } +} + +static inline uint32_t cairo_pure +BUF_tiling (uint32_t tiling) +{ + switch (tiling) { + default: + case I915_TILING_NONE: return 0; + case I915_TILING_X: return BUF_3D_TILED_SURFACE | BUF_3D_TILE_WALK_X; + case I915_TILING_Y: return BUF_3D_TILED_SURFACE | BUF_3D_TILE_WALK_Y; + } +} + +#define OUT_DWORD(dword) i915_batch_emit_dword (device, dword) +#define OUT_RELOC(surface, read, write) i915_batch_emit_reloc (device, to_intel_bo (surface->intel.drm.bo), surface->offset, read, write) + +#define FS_LOCALS \ + uint32_t *_shader_start + +#define FS_BEGIN() \ +do { \ + _shader_start = BATCH_PTR (device); \ + OUT_DWORD (_3DSTATE_PIXEL_SHADER_PROGRAM); \ +} while (0) + +#define FS_END() \ +do { \ + *_shader_start |= BATCH_PTR (device) - _shader_start - 2; \ +} while (0); + +static inline int32_t +i915_batch_space (i915_device_t *device) +{ + /* leave room for RECTLIST(4) + MI_BUFFER_END + MI_NOOP */ + return sizeof (device->batch_base) - (device->batch.used << 2) - 32; +} + +static inline cairo_bool_t +i915_check_aperture_size (const i915_device_t *device, int relocs, size_t size) +{ + return device->batch.reloc_count + relocs < I915_MAX_RELOCS && + device->batch.gtt_size + size <= device->intel.gtt_avail_size; +} + +static inline cairo_bool_t +i915_check_aperture (const i915_device_t *device, intel_bo_t **bo_array, int count) +{ + uint32_t relocs = 0, size = 0; + + while (count--) { + const intel_bo_t *bo = *bo_array++; + if (bo->exec == NULL) { + relocs++; + size += bo->base.size; + } + } + + return i915_check_aperture_size (device, relocs, size); +} + +#define BATCH_PTR(device) &(device)->batch_base[(device)->batch.used] +static inline void +i915_batch_emit_dword (i915_device_t *device, uint32_t dword) +{ + device->batch_base[device->batch.used++] = dword; +} + +cairo_private void +i915_batch_add_reloc (i915_device_t *device, uint32_t pos, + intel_bo_t *bo, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain); + +static inline void +i915_batch_fill_reloc (i915_device_t *device, uint32_t pos, + intel_bo_t *bo, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain) +{ + i915_batch_add_reloc (device, pos, + bo, offset, + read_domains, write_domain); + device->batch_base[pos] = bo->offset + offset; +} + +static inline void +i915_batch_emit_reloc (i915_device_t *device, + intel_bo_t *bo, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain) +{ + i915_batch_add_reloc (device, device->batch.used, + bo, offset, + read_domains, write_domain); + i915_batch_emit_dword (device, bo->offset + offset); +} + +cairo_private cairo_status_t +i915_vbo_flush (i915_device_t *device); + +cairo_private void +i915_vbo_finish (i915_device_t *device); + +cairo_private cairo_status_t +i915_batch_flush (i915_device_t *device); + +static inline float * +i915_add_rectangle (i915_device_t *device) +{ + float *vertices; + uint32_t size; + + assert (device->floats_per_vertex); + + size = device->rectangle_size; + if (unlikely (device->vbo_offset + size > I915_VBO_SIZE)) + i915_vbo_finish (device); + + vertices = (float *) (device->vbo_base + device->vbo_offset); + device->vbo_used = device->vbo_offset += size; + device->vertex_count += 3; + return vertices; +} + +static inline i915_device_t * +i915_device (i915_surface_t *surface) +{ + return (i915_device_t *) surface->intel.drm.base.device; +} + +cairo_private void +i915_shader_init (i915_shader_t *shader, + i915_surface_t *dst, + cairo_operator_t op); + +cairo_private cairo_status_t +i915_shader_acquire_pattern (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_pattern_t *pattern, + const cairo_rectangle_int_t *extents); + +cairo_private void +i915_shader_set_clip (i915_shader_t *shader, + cairo_clip_t *clip); + +cairo_private int +i915_shader_num_texcoords (const i915_shader_t *shader); + +static inline double cairo_const +i915_shader_linear_texcoord (const struct i915_shader_linear *l, + double src_x, double src_y) +{ + return l->dx * src_x + l->dy * src_y + l->offset; +} + +cairo_private cairo_status_t +i915_shader_commit (i915_shader_t *shader, + i915_device_t *device); + +cairo_private void +i915_shader_fini (i915_shader_t *shader); + +cairo_private cairo_status_t +i915_fixup_unbounded (i915_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip); + +#endif /* CAIRO_DRM_I915_PRIVATE_H */ diff --git a/src/drm/cairo-drm-i915-shader.c b/src/drm/cairo-drm-i915-shader.c new file mode 100644 index 00000000..afcbd42b --- /dev/null +++ b/src/drm/cairo-drm-i915-shader.c @@ -0,0 +1,2674 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-error-private.h" +#include "cairo-drm-i915-private.h" +#include "cairo-surface-subsurface-private.h" +#include "cairo-surface-snapshot-private.h" + +#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS +/* for DRI2/DRM interoperability */ +#include "cairo-xcb-private.h" +#endif + +#if 0 +static cairo_status_t +i915_packed_pixel_surface_finish (void *abstract_surface) +{ + i915_packed_pixel_surface_t *surface = abstract_surface; + i915_device_t *device; + + device = i915_device_acquire (&surface->device->intel.base); + + intel_bo_destroy (&device->intel, surface->bo); + + if (surface->is_current_texture) { + if (surface->is_current_texture & CURRENT_SOURCE) + device->current_source = NULL; + if (surface->is_current_texture & CURRENT_MASK) + device->current_mask = NULL; + device->current_n_samplers = 0; + } + + i915_device_release (device); + + return CAIRO_STATUS_SUCCESS; +} + +static const cairo_surface_backend_t i915_packed_pixel_surface_backend = { + I915_PACKED_PIXEL_SURFACE_TYPE, + i915_packed_pixel_surface_finish, +}; + +static cairo_surface_t * +i915_packed_pixel_surface_create (i915_device_t *device, + i915_packed_pixel_t pixel, + const uint8_t *data, + uint32_t length, + uint32_t width, uint32_t height) +{ + i915_packed_pixel_surface_t *surface; + cairo_content_t content; + uint32_t tiling, size; + uint32_t stride, half_stride; + uint32_t i; + + if (width > 2048 || height > 2048) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_SIZE)); + + surface = malloc (sizeof (i915_packed_pixel_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + tiling = I915_TILING_NONE; /* XXX */ + half_stride = stride = i915_tiling_stride (tiling, width/2); + if (stride < width) + stride *= 2 ; + height = i915_tiling_height (tiling, height); + + switch (surface->pixel = pixel) { + case YUV_I420: + content = CAIRO_CONTENT_COLOR; + + surface->offset[0] = 0; + surface->width[0] = width; + surface->height[0] = height; + surface->stride[0] = stride; + surface->map0[0] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling); + surface->map0[0] |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + surface->map1[0] = (stride / 4 - 1) << MS4_PITCH_SHIFT; + + surface->offset[1] = stride * height; + surface->width[1] = width / 2; + surface->height[1] = height / 2; + surface->stride[1] = half_stride; + surface->map0[1] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling); + surface->map0[1] |= ((height/2 - 1) << MS3_HEIGHT_SHIFT) | + ((width/2 - 1) << MS3_WIDTH_SHIFT); + surface->map1[1] = (half_stride / 4 - 1) << MS4_PITCH_SHIFT; + + if (width < half_stride) { + surface->offset[2] = stride * height + half_stride / 2; + size = stride * height + half_stride * height / 2; + } else { + surface->offset[2] = stride * height + half_stride * height / 2; + size = stride * height + half_stride * height; + } + surface->width[2] = width / 2; + surface->height[2] = height / 2; + surface->stride[2] = half_stride; + surface->map0[2] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling); + surface->map0[2] |= ((height/2 - 1) << MS3_HEIGHT_SHIFT) | + ((width/2 - 1) << MS3_WIDTH_SHIFT); + surface->map1[2] = (half_stride / 4 - 1) << MS4_PITCH_SHIFT; + break; + + case NONE: + case YUV_YV12: + case YUV_YUY2: + case YUV_UYVY: + ASSERT_NOT_REACHED; + break; + } + + _cairo_surface_init (&surface->base, + &i915_packed_pixel_surface_backend, + content); + + surface->bo = intel_bo_create (&device->intel, size, FALSE); + assert (tiling == I915_TILING_NONE); + if (unlikely (surface->bo == NULL)) { + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + + if (tiling == I915_TILING_NONE) { + intel_bo_t *bo = surface->bo; + uint32_t dst; + int uv; + + dst = surface->offset[0]; + if (width == stride) { + size = stride * height; + intel_bo_write (&device->intel, bo, dst, size, data); + data += size; + } else { + for (i = 0; i < height; i++) { + intel_bo_write (&device->intel, bo, dst, width, data); + dst += stride; + data += width; + } + } + + for (uv = 1; uv <= 2; uv++) { + dst = surface->offset[uv]; + if (width / 2 == half_stride) { + size = half_stride * height / 2; + intel_bo_write (&device->intel, bo, dst, size, data); + data += size; + } else { + size = width / 2; + for (i = 0; i < height / 2; i++) { + intel_bo_write (&device->intel, bo, dst, size, data); + dst += half_stride; + data += size; + } + } + } + } else { + uint8_t *dst, *base; + + base = intel_bo_map (&device->intel, surface->bo); + + dst = base + surface->offset[0]; + if (width == stride) { + size = stride * height; + memcpy (dst, data, size); + data += size; + } else { + for (i = 0; i < height; i++) { + memcpy (dst, data, width); + dst += stride; + data += width; + } + } + + dst = base + surface->offset[1]; + if (width / 2 == half_stride) { + size = half_stride * height / 2; + memcpy (dst, data, size); + data += size; + } else { + size = width / 2; + for (i = 0; i < height / 2; i++) { + memcpy (dst, data, size); + dst += half_stride; + data += size; + } + } + + dst = base + surface->offset[2]; + if (width / 2 == half_stride) { + size = half_stride * height / 2; + memcpy (dst, data, size); + data += size; + } else { + size = width / 2; + for (i = 0; i < height / 2; i++) { + memcpy (dst, data, size); + dst += half_stride; + data += size; + } + } + + intel_bo_unmap (surface->bo); + } + + surface->device = device; + surface->is_current_texture = 0; + + return &surface->base; +} + +static cairo_int_status_t +i915_clone_yuv (i915_surface_t *surface, + cairo_surface_t *source, + int width, int height, + cairo_surface_t **clone_out) +{ + const uint8_t *mime_data = NULL; + unsigned int mime_data_length; + cairo_surface_t *clone; + + cairo_surface_get_mime_data (source, "video/x-raw-yuv/i420", + &mime_data, &mime_data_length); + if (mime_data == NULL) + return CAIRO_INT_STATUS_UNSUPPORTED; + + clone = + i915_packed_pixel_surface_create ((i915_device_t *) surface->base.device, + YUV_I420, + mime_data, mime_data_length, + width, height); + if (clone == NULL) + return CAIRO_INT_STATUS_UNSUPPORTED; + if (unlikely (clone->status)) + return clone->status; + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} +#endif + +/* Max instruction count: 4 */ +static void +i915_shader_linear_color (i915_device_t *device, + enum i915_shader_linear_mode mode, + int in, int c0, int c1, int out) +{ + int tmp = FS_U0; + + switch (mode) { + case LINEAR_TEXTURE: + ASSERT_NOT_REACHED; + case LINEAR_NONE: + tmp = in; + break; + + case LINEAR_REPEAT: + i915_fs_frc (tmp, i915_fs_operand (in, X, X, X, X)); + break; +#if 0 + case LINEAR_REFLECT: + /* XXX needs an extra constant: C2 [0.5, 2.0, x, x] */ + i915_fs_mul (tmp, in, 0.5); + i915_fs_frc (tmp, i915_fs_operand_reg (tmp)); + i915_fs_mul (tmp, tmp, 2.0); + i915_fs_add (tmp, i915_fs_operand_one (), + i915_fs_operand_reg_negate (tmp)); + i915_fs_cmp (tmp, + i915_fs_operand_reg (tmp), + i915_fs_operand_reg (tmp), + i915_fs_operand_reg_negate (tmp)); + i915_fs_add (tmp, i915_fs_operand_one (), + i915_fs_operand_reg_negate (tmp)); +#endif + case LINEAR_PAD: + i915_fs_max (tmp, + i915_fs_operand_zero (), + i915_fs_operand (in, X, X, X, X)); + i915_fs_min (tmp, + i915_fs_operand_one (), + i915_fs_operand_reg (tmp)); + break; + } + + /* interpolate */ + i915_fs_mad (out, 0, + i915_fs_operand (tmp, -X, -X, -X, -X), + i915_fs_operand_reg (c0), + i915_fs_operand_reg (c0)); + i915_fs_mad (out, 0, + i915_fs_operand (tmp, X, X, X, X), + i915_fs_operand_reg (c1), + i915_fs_operand_reg (out)); +} + +static void +i915_shader_radial_init (struct i915_shader_radial *r, + const cairo_radial_pattern_t *radial) +{ + double dx, dy, dr, r1; + + dx = _cairo_fixed_to_double (radial->c2.x - radial->c1.x); + dy = _cairo_fixed_to_double (radial->c2.y - radial->c1.y); + dr = _cairo_fixed_to_double (radial->r2 - radial->r1); + + r1 = _cairo_fixed_to_double (radial->r1); + + if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) { + /* XXX dr == 0, meaningless with anything other than PAD */ + r->constants[0] = _cairo_fixed_to_double (radial->c1.x) / dr; + r->constants[1] = _cairo_fixed_to_double (radial->c1.y) / dr; + r->constants[2] = 1. / dr; + r->constants[3] = -r1 / dr; + + r->constants[4] = 0; + r->constants[5] = 0; + r->constants[6] = 0; + r->constants[7] = 0; + + r->base.mode = RADIAL_ONE; + } else { + r->constants[0] = -_cairo_fixed_to_double (radial->c1.x); + r->constants[1] = -_cairo_fixed_to_double (radial->c1.y); + r->constants[2] = r1; + r->constants[3] = -4 * (dx*dx + dy*dy - dr*dr); + + r->constants[4] = -2 * dx; + r->constants[5] = -2 * dy; + r->constants[6] = -2 * r1 * dr; + r->constants[7] = 1 / (2 * (dx*dx + dy*dy - dr*dr)); + + r->base.mode = RADIAL_TWO; + } + + r->base.matrix = radial->base.base.matrix; +} + +/* Max instruction count: 10 */ +static void +i915_shader_radial_coord (i915_device_t *device, + enum i915_shader_radial_mode mode, + int in, int g0, int g1, int out) +{ + switch (mode) { + case RADIAL_ONE: + /* + pdx = (x - c1x) / dr, pdy = (y - c1y) / dr; + r² = pdx*pdx + pdy*pdy + t = r²/sqrt(r²) - r1/dr; + */ + i915_fs_mad (FS_U0, MASK_X | MASK_Y, + i915_fs_operand (in, X, Y, ZERO, ZERO), + i915_fs_operand (g0, Z, Z, ZERO, ZERO), + i915_fs_operand (g0, -X, -Y, ZERO, ZERO)); + i915_fs_dp2add (FS_U0, MASK_X, + i915_fs_operand (FS_U0, X, Y, ZERO, ZERO), + i915_fs_operand (FS_U0, X, Y, ZERO, ZERO), + i915_fs_operand_zero ()); + i915_fs_rsq (out, MASK_X, i915_fs_operand (FS_U0, X, X, X, X)); + i915_fs_mad (out, MASK_X, + i915_fs_operand (FS_U0, X, ZERO, ZERO, ZERO), + i915_fs_operand (out, X, ZERO, ZERO, ZERO), + i915_fs_operand (g0, W, ZERO, ZERO, ZERO)); + break; + + case RADIAL_TWO: + /* + pdx = x - c1x, pdy = y - c1y; + A = dx² + dy² - dr² + B = -2*(pdx*dx + pdy*dy + r1*dr); + C = pdx² + pdy² - r1²; + det = B*B - 4*A*C; + t = (-B + sqrt (det)) / (2 * A) + */ + + /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */ + i915_fs_add (FS_U0, + i915_fs_operand (in, X, Y, ZERO, ZERO), + i915_fs_operand (g0, X, Y, Z, ZERO)); + /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */ + i915_fs_dp3 (FS_U0, MASK_W, + i915_fs_operand (FS_U0, X, Y, ONE, ZERO), + i915_fs_operand (g1, X, Y, Z, ZERO)); + /* u1.x = pdx² + pdy² - r1²; [C] */ + i915_fs_dp3 (FS_U1, MASK_X, + i915_fs_operand (FS_U0, X, Y, Z, ZERO), + i915_fs_operand (FS_U0, X, Y, -Z, ZERO)); + /* u1.x = C, u1.y = B, u1.z=-4*A; */ + i915_fs_mov_masked (FS_U1, MASK_Y, i915_fs_operand (FS_U0, W, W, W, W)); + i915_fs_mov_masked (FS_U1, MASK_Z, i915_fs_operand (g0, W, W, W, W)); + /* u1.x = B² - 4*A*C */ + i915_fs_dp2add (FS_U1, MASK_X, + i915_fs_operand (FS_U1, X, Y, ZERO, ZERO), + i915_fs_operand (FS_U1, Z, Y, ZERO, ZERO), + i915_fs_operand_zero ()); + /* out.x = -B + sqrt (B² - 4*A*C), + * out.y = -B - sqrt (B² - 4*A*C), + */ + i915_fs_rsq (out, MASK_X, i915_fs_operand (FS_U1, X, X, X, X)); + i915_fs_mad (out, MASK_X | MASK_Y, + i915_fs_operand (out, X, X, ZERO, ZERO), + i915_fs_operand (FS_U1, X, -X, ZERO, ZERO), + i915_fs_operand (FS_U0, -W, -W, ZERO, ZERO)); + /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), + * out.y = (-B - sqrt (B² - 4*A*C)) / (2 * A) + */ + i915_fs_mul (out, + i915_fs_operand (out, X, Y, ZERO, ZERO), + i915_fs_operand (g1, W, W, ZERO, ZERO)); + /* if (A > 0) + * out = (-B + sqrt (B² - 4*A*C)) / (2 * A), + * else + * out = (-B - sqrt (B² - 4*A*C)) / (2 * A) + */ + i915_fs_cmp (out, + i915_fs_operand (g1, W, ZERO, ZERO, ZERO), + i915_fs_operand (out, X, ZERO, ZERO, ZERO), + i915_fs_operand (out, Y, ZERO, ZERO, ZERO)); + break; + } +} + +/* Max instruction count: 7 */ +static inline void +i915_shader_yuv_color (i915_device_t *device, + int y, int u, int v, + int c0, int c1, int c2, + int out) +{ + i915_fs_mov_masked (FS_U0, MASK_X, i915_fs_operand_reg (y)); + i915_fs_mov_masked (FS_U0, MASK_Y, i915_fs_operand_reg (u)); + i915_fs_mov_masked (FS_U0, MASK_Z, i915_fs_operand_reg (v)); + + i915_fs_add (FS_U0, + i915_fs_operand_reg (FS_U0), + i915_fs_operand_reg (c0)); + i915_fs_dp3 (out, MASK_X, + i915_fs_operand_reg (FS_U0), + i915_fs_operand (c1, X, ZERO, Y, ZERO)); + i915_fs_dp3 (out, MASK_Z, + i915_fs_operand_reg (FS_U0), + i915_fs_operand (c1, Z, W, ZERO, ZERO)); + i915_fs_dp3 (out, MASK_Y, + i915_fs_operand_reg (FS_U0), + i915_fs_operand_reg (c2)); +} + +static inline uint32_t +i915_shader_channel_key (const union i915_shader_channel *channel) +{ + return (channel->type.fragment & 0x0f) | (channel->base.mode << FS_DETAILS_SHIFT); +} + +static uint32_t +i915_shader_channel_get_num_tex_coords (const union i915_shader_channel *channel) +{ + switch (channel->type.fragment) { + default: + case FS_ZERO: + case FS_ONE: + case FS_CONSTANT: + case FS_PURE: + case FS_DIFFUSE: + return 0; + + case FS_LINEAR: + case FS_RADIAL: + case FS_TEXTURE: + case FS_SPANS: + case FS_YUV: + return 1; + } +} + +static uint32_t +i915_shader_get_num_tex_coords (const i915_shader_t *shader) +{ + uint32_t num_tex_coords; + + num_tex_coords = 0; + + num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->source); + num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->mask); + num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->clip); + num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->dst); + + return num_tex_coords; +} + +#define i915_fs_operand_impure(reg, channel, pure) \ + (reg | \ + (((pure & (1 << 0)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \ + (((pure & (1 << 1)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \ + (((pure & (1 << 2)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \ + (((pure & (1 << 3)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT)) + +#define i915_fs_operand_pure(pure) \ + (FS_R0 | \ + (((pure & (1 << 0)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \ + (((pure & (1 << 1)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \ + (((pure & (1 << 2)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \ + (((pure & (1 << 3)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT)) + +#define i915_fs_operand_reg_pure(reg, pure) \ + (reg | \ + (((pure & (1 << 0)) ? X_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \ + (((pure & (1 << 1)) ? Y_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \ + (((pure & (1 << 2)) ? Z_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \ + (((pure & (1 << 3)) ? W_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT)) + +static void +i915_set_shader_program (i915_device_t *device, + const i915_shader_t *shader) +{ + uint32_t num_tex_coords; + uint32_t num_samplers; + uint32_t n; + uint32_t texture_offset = 0; + uint32_t constant_offset = 0; + uint32_t sampler_offset = 0; + uint32_t source_reg; + uint32_t source_pure; + uint32_t mask_reg; + uint32_t out_reg; + uint32_t dest_reg; + FS_LOCALS; + + n = (i915_shader_channel_key (&shader->source) << 0) | + (i915_shader_channel_key (&shader->mask) << 8) | + (i915_shader_channel_key (&shader->clip) << 16) | + (shader->op << 24) | + (((shader->content & CAIRO_CONTENT_ALPHA) == CAIRO_CONTENT_ALPHA) << 31); + if (n == device->current_program) + return; + device->current_program = n; + + FS_BEGIN (); + + if (shader->source.type.fragment == FS_ZERO) { + if (shader->clip.type.fragment == FS_TEXTURE) { + /* XXX need_combine */ + assert (shader->mask.type.fragment == (i915_fragment_shader_t) -1); + i915_fs_dcl (FS_T0); + i915_fs_texld (FS_U0, FS_S0, FS_T0); + if ((shader->content & CAIRO_CONTENT_COLOR) == 0) + i915_fs_mov (FS_OC, i915_fs_operand (FS_U0, W, W, W, W)); + else + i915_fs_mov (FS_OC, i915_fs_operand (FS_U0, ZERO, ZERO, ZERO, W)); + } else { + i915_fs_mov (FS_OC, i915_fs_operand_zero ()); + } + + FS_END (); + return; + } + + num_tex_coords = i915_shader_get_num_tex_coords (shader); + for (n = 0; n < num_tex_coords; n++) + i915_fs_dcl (FS_T0 + n); + + num_samplers = + shader->source.base.n_samplers + + shader->mask.base.n_samplers + + shader->clip.base.n_samplers + + shader->dst.base.n_samplers; + for (n = 0; n < num_samplers; n++) + i915_fs_dcl (FS_S0 + n); + + source_reg = ~0; + source_pure = 0; + out_reg = FS_R0; + if (! shader->need_combine && + shader->mask.type.fragment == (i915_fragment_shader_t) -1 && + shader->clip.type.fragment != FS_TEXTURE && + shader->content != CAIRO_CONTENT_ALPHA) + { + out_reg = FS_OC; + } + + switch (shader->source.type.fragment) { + default: + case FS_ZERO: + case FS_SPANS: + ASSERT_NOT_REACHED; + + case FS_PURE: + source_pure = shader->source.solid.pure; + case FS_ONE: + break; + + case FS_CONSTANT: + source_reg = FS_C0; + constant_offset += 1; + break; + + case FS_DIFFUSE: + i915_fs_dcl (FS_T8); + source_reg = FS_T8; + break; + + case FS_LINEAR: + i915_shader_linear_color (device, shader->source.base.mode, + FS_T0, /* input */ + FS_C0, FS_C1, /* colour ramp */ + FS_U3); /* unpremultiplied output */ + /* XXX can we defer premultiplication? */ + i915_fs_mul (out_reg, + i915_fs_operand_reg (FS_U3), + i915_fs_operand (FS_U3, W, W, W, W)); + + constant_offset += 2; + texture_offset += 1; + source_reg = out_reg; + break; + + case FS_RADIAL: + i915_shader_radial_coord (device, shader->source.base.mode, + FS_T0, /* input */ + FS_C0, FS_C1, /* gradient constants */ + FS_U3); /* coordinate */ + + i915_fs_texld (out_reg, FS_S0, FS_U3); + constant_offset += 2; + texture_offset += 1; + sampler_offset += 1; + source_reg = out_reg; + break; + + case FS_TEXTURE: + i915_fs_texld (out_reg, FS_S0, FS_T0); + texture_offset += 1; + sampler_offset += 1; + source_reg = out_reg; + break; + + case FS_YUV: + /* Load samplers to temporaries. */ + i915_fs_texld (FS_R0, FS_S0, FS_T0); + i915_fs_texld (FS_R1, FS_S1, FS_T0); + i915_fs_texld (FS_R2, FS_S2, FS_T0); + + i915_shader_yuv_color (device, + FS_R0, FS_R1, FS_R2, /* y, u, v */ + FS_C0, FS_C1, FS_C2, /* coefficients */ + out_reg); + + constant_offset += 3; + texture_offset += 1; + sampler_offset += 3; + source_reg = out_reg; + break; + } + + mask_reg = ~0; + switch (shader->mask.type.fragment) { + case FS_PURE: + case FS_ZERO: + case FS_YUV: + case FS_DIFFUSE: + ASSERT_NOT_REACHED; + case FS_ONE: + default: + break; + + case FS_SPANS: + mask_reg = FS_T0 + texture_offset; + texture_offset += 1; + break; + + case FS_CONSTANT: + mask_reg = FS_C0 + constant_offset; + constant_offset += 1; + break; + + case FS_LINEAR: + i915_shader_linear_color (device, shader->source.base.mode, + FS_T0 + texture_offset, /* input */ + FS_C0 + constant_offset, + FS_C0 + constant_offset + 1, /* colour ramp */ + FS_U3); /* unpremultiplied output */ + i915_fs_mul (FS_R1, + i915_fs_operand_reg (FS_U3), + i915_fs_operand (source_reg, W, W, W, W)); + + constant_offset += 2; + texture_offset += 1; + mask_reg = FS_R1; + break; + + case FS_RADIAL: + i915_shader_radial_coord (device, shader->source.base.mode, + FS_T0 + texture_offset, /* input */ + FS_C0 + constant_offset, + FS_C0 + constant_offset + 1, /* gradient constants */ + FS_U3); /* coordinate */ + + i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_U3); + constant_offset += 2; + texture_offset += 1; + sampler_offset += 1; + mask_reg = FS_R1; + break; + + case FS_TEXTURE: + i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_T0 + texture_offset); + texture_offset += 1; + sampler_offset += 1; + mask_reg = FS_R1; + break; + } + + if (mask_reg != ~0U) { + if (! shader->need_combine && + shader->clip.type.fragment != FS_TEXTURE && + shader->content != CAIRO_CONTENT_ALPHA) + { + out_reg = FS_OC; + } + if (source_reg == ~0U) { + if (source_pure) { + if (shader->mask.type.fragment == FS_SPANS) { + i915_fs_mov (out_reg, + i915_fs_operand_impure (mask_reg, X, source_pure)); + } else { + /* XXX ComponentAlpha + i915_fs_mov (out_reg, + i915_fs_operand_pure (mask_reg, + shader->source.solid.pure)); + */ + i915_fs_mov (out_reg, + i915_fs_operand_impure (mask_reg, W, source_pure)); + } + source_reg = out_reg; + } else if (shader->mask.type.fragment == FS_SPANS) { + i915_fs_mov (out_reg, + i915_fs_operand (mask_reg, X, X, X, X)); + source_reg = out_reg; + } else { + source_reg = mask_reg; + } + } else { + if (shader->mask.type.fragment == FS_SPANS) { + i915_fs_mul (out_reg, + i915_fs_operand_reg (source_reg), + i915_fs_operand (mask_reg, X, X, X, X)); + } else { + /* XXX ComponentAlpha + i915_fs_mul (FS_R0, + i915_fs_operand_reg (source_reg), + i915_fs_operand_reg (mask_reg)); + */ + i915_fs_mul (out_reg, + i915_fs_operand_reg (source_reg), + i915_fs_operand (mask_reg, W, W, W, W)); + } + + source_reg = out_reg; + } + } + + /* need to preserve order of src, mask, clip, dst */ + mask_reg = ~0; + if (shader->clip.type.fragment == FS_TEXTURE) { + i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_T0 + texture_offset); + texture_offset += 1; + sampler_offset += 1; + mask_reg = FS_R1; + } + + if (shader->need_combine) { + assert (shader->dst.type.fragment == FS_TEXTURE); + + i915_fs_texld (FS_R2, FS_S0 + sampler_offset, FS_T0 + texture_offset); + texture_offset += 1; + sampler_offset += 1; + dest_reg = FS_R2; + + switch (shader->op) { + case CAIRO_OPERATOR_CLEAR: + case CAIRO_OPERATOR_SOURCE: + ASSERT_NOT_REACHED; + + case CAIRO_OPERATOR_OVER: + if (source_reg == ~0U) { + /* XXX shader->source.type.fragment == FS_PURE */ + dest_reg = FS_OC; + } else { + i915_fs_add (FS_U0, + i915_fs_operand (source_reg, -W, -W, -W, -W), + i915_fs_operand_one ()); + i915_fs_mul (FS_U0, + i915_fs_operand_reg (FS_U0), + dest_reg); + i915_fs_add (FS_R3, + i915_fs_operand_reg (source_reg), + i915_fs_operand_reg (FS_U0)); + source_reg = FS_R3; + } + break; + + case CAIRO_OPERATOR_IN: + if (source_reg == ~0U) { + /* XXX shader->source.type.fragment == FS_PURE */ + source_reg = dest_reg; + } else { + i915_fs_mul (FS_R3, + i915_fs_operand_reg (source_reg), + dest_reg); + source_reg = FS_R3; + } + break; + + case CAIRO_OPERATOR_OUT: + if (source_reg == ~0U) { + /* XXX shader->source.type.fragment == FS_PURE */ + i915_fs_mov (FS_R3, i915_fs_operand_zero ()); + source_reg = FS_R3; + } else { + i915_fs_add (FS_U0, + i915_fs_operand (source_reg, -W, -W, -W, -W), + i915_fs_operand_one ()); + i915_fs_mul (FS_R3, + i915_fs_operand_reg (FS_U0), + dest_reg); + source_reg = FS_R3; + } + break; + + case CAIRO_OPERATOR_ATOP: + + case CAIRO_OPERATOR_DEST: + case CAIRO_OPERATOR_DEST_OVER: + case CAIRO_OPERATOR_DEST_IN: + case CAIRO_OPERATOR_DEST_OUT: + case CAIRO_OPERATOR_DEST_ATOP: + + case CAIRO_OPERATOR_XOR: + case CAIRO_OPERATOR_ADD: + case CAIRO_OPERATOR_SATURATE: + + case CAIRO_OPERATOR_MULTIPLY: + case CAIRO_OPERATOR_SCREEN: + case CAIRO_OPERATOR_OVERLAY: + case CAIRO_OPERATOR_DARKEN: + case CAIRO_OPERATOR_LIGHTEN: + case CAIRO_OPERATOR_COLOR_DODGE: + case CAIRO_OPERATOR_COLOR_BURN: + case CAIRO_OPERATOR_HARD_LIGHT: + case CAIRO_OPERATOR_SOFT_LIGHT: + case CAIRO_OPERATOR_DIFFERENCE: + case CAIRO_OPERATOR_EXCLUSION: + case CAIRO_OPERATOR_HSL_HUE: + case CAIRO_OPERATOR_HSL_SATURATION: + case CAIRO_OPERATOR_HSL_COLOR: + case CAIRO_OPERATOR_HSL_LUMINOSITY: + ASSERT_NOT_REACHED; + break; + } + } + + if (shader->clip.type.fragment == FS_TEXTURE) { + assert (mask_reg != ~0U); + + if (! shader->need_combine) { + /* (source IN clip) */ + if (source_reg == ~0U) { + if (source_pure == 0) { + source_reg = mask_reg; + } else { + out_reg = FS_OC; + if (shader->content == CAIRO_CONTENT_ALPHA) + out_reg = FS_U0; + i915_fs_mov (out_reg, + i915_fs_operand_reg_pure (mask_reg, source_pure)); + source_reg = out_reg; + } + } else if (mask_reg) { + out_reg = FS_OC; + if (shader->content == CAIRO_CONTENT_ALPHA) + out_reg = FS_U0; + i915_fs_mul (out_reg, + i915_fs_operand_reg (source_reg), + i915_fs_operand (mask_reg, W, W, W, W)); + + source_reg = out_reg; + } + } else { + /* (source OP dest) LERP_clip dest */ + if (source_reg == ~0U) { + if (source_pure == 0) { + i915_fs_mov (FS_U0, + i915_fs_operand (mask_reg, W, W, W, W)); + } else { + i915_fs_mov (FS_U0, + i915_fs_operand_impure (mask_reg, W, source_pure)); + } + } else { + i915_fs_mul (FS_U0, + i915_fs_operand_reg (source_reg), + i915_fs_operand (mask_reg, W, W, W, W)); + } + + i915_fs_add (mask_reg, + i915_fs_operand_one (), + i915_fs_operand (mask_reg, -W, -W, -W, -W)); + + if (dest_reg != FS_OC) { + if (dest_reg == ~0U) { + assert (shader->dst.type.fragment == FS_TEXTURE); + + i915_fs_texld (FS_R2, FS_S0 + sampler_offset, FS_T0 + texture_offset); + texture_offset += 1; + sampler_offset += 1; + dest_reg = FS_R2; + } + + i915_fs_mul (FS_U1, + i915_fs_operand_reg (dest_reg), + i915_fs_operand_reg (mask_reg)); + mask_reg = FS_U1; + } + + source_reg = FS_OC; + if (shader->content != CAIRO_CONTENT_COLOR_ALPHA) + source_reg = FS_U0; + i915_fs_add (source_reg, + i915_fs_operand_reg (FS_U0), + i915_fs_operand_reg (mask_reg)); + } + } + + if (source_reg != FS_OC) { + if (source_reg == ~0U) + if (source_pure) + i915_fs_mov (FS_OC, i915_fs_operand_pure (source_pure)); + else + i915_fs_mov (FS_OC, i915_fs_operand_one ()); + else if ((shader->content & CAIRO_CONTENT_COLOR) == 0) + i915_fs_mov (FS_OC, i915_fs_operand (source_reg, W, W, W, W)); + else + i915_fs_mov (FS_OC, i915_fs_operand_reg (source_reg)); + } else { + if ((shader->content & CAIRO_CONTENT_COLOR) == 0) + i915_fs_mov (FS_OC, i915_fs_operand (FS_OC, W, W, W, W)); + } + + FS_END (); +} + +static void +i915_shader_linear_init (struct i915_shader_linear *l, + const cairo_linear_pattern_t *linear) +{ + double x0, y0, sf; + double dx, dy, offset; + + dx = _cairo_fixed_to_double (linear->p2.x - linear->p1.x); + dy = _cairo_fixed_to_double (linear->p2.y - linear->p1.y); + sf = 1. / (dx * dx + dy * dy); + dx *= sf; + dy *= sf; + + x0 = _cairo_fixed_to_double (linear->p1.x); + y0 = _cairo_fixed_to_double (linear->p1.y); + offset = dx*x0 + dy*y0; + + if (_cairo_matrix_is_identity (&linear->base.base.matrix)) { + l->dx = dx; + l->dy = dy; + l->offset = -offset; + } else { + cairo_matrix_t m; + + cairo_matrix_init (&m, dx, 0, dy, 0, -offset, 0); + cairo_matrix_multiply (&m, &linear->base.base.matrix, &m); + l->dx = m.xx; + l->dy = m.xy; + l->offset = m.x0; + } +} + +static cairo_bool_t +i915_shader_linear_contains_rectangle (struct i915_shader_linear *l, + const cairo_rectangle_int_t *extents) +{ + double v; + + v = i915_shader_linear_texcoord (l, + extents->x, + extents->y); + if (v < 0.) + return FALSE; + if (v > 1.) + return FALSE; + + v = i915_shader_linear_texcoord (l, + extents->x + extents->width, + extents->y); + if (v < 0.) + return FALSE; + if (v > 1.) + return FALSE; + + v = i915_shader_linear_texcoord (l, + extents->x, + extents->y + extents->height); + if (v < 0.) + return FALSE; + if (v > 1.) + return FALSE; + + v = i915_shader_linear_texcoord (l, + extents->x + extents->width, + extents->y + extents->height); + if (v < 0.) + return FALSE; + if (v > 1.) + return FALSE; + + return TRUE; +} + +#define is_pure(C,mask) (((mask) == 0) || (C) <= 0x00ff || (C) >= 0xff00) +#define is_one(C,mask) (((mask) != 0) && (C) >= 0xff00) +#define is_zero(C,mask) (((mask) != 0) && (C) <= 0x00ff) + +static cairo_status_t +i915_shader_acquire_solid (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_solid_pattern_t *solid, + const cairo_rectangle_int_t *extents) +{ + cairo_content_t content; + + content = solid->content; + src->solid.color = solid->color; + if (content == 0 || solid->color.alpha_short <= 0x00ff) + { + src->base.content = CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_ZERO; + } + else if ((((content & CAIRO_CONTENT_COLOR) == 0) || + (solid->color.red_short >= 0xff00 && + solid->color.green_short >= 0xff00 && + solid->color.blue_short >= 0xff00)) && + ((content & CAIRO_CONTENT_ALPHA) == 0 || + solid->color.alpha_short >= 0xff00)) + { + src->base.content = CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_ONE; + } + else if (is_pure (solid->color.red_short, content & CAIRO_CONTENT_COLOR) && + is_pure (solid->color.green_short, content & CAIRO_CONTENT_COLOR) && + is_pure (solid->color.blue_short, content & CAIRO_CONTENT_COLOR) && + is_pure (solid->color.alpha_short, content & CAIRO_CONTENT_ALPHA)) + { + src->solid.pure = 0; + src->solid.pure |= is_one (solid->color.red_short, content & CAIRO_CONTENT_COLOR) << 0; + src->solid.pure |= is_one (solid->color.green_short, content & CAIRO_CONTENT_COLOR) << 1; + src->solid.pure |= is_one (solid->color.blue_short, content & CAIRO_CONTENT_COLOR) << 2; + src->solid.pure |= (! is_zero (solid->color.alpha_short, content & CAIRO_CONTENT_ALPHA)) << 3; + + if (src->solid.pure == 0) { + src->base.content = CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_ZERO; + } else if (src->solid.pure == 0x7) { + src->base.content = CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_ONE; + } else { + src->base.content = content; + src->type.fragment = FS_PURE; + src->base.mode = src->solid.pure; + } + } + else + { + src->base.content = content; + src->type.fragment = src == &shader->source ? FS_DIFFUSE : FS_CONSTANT; + } + src->type.vertex = VS_CONSTANT; + src->type.pattern = PATTERN_CONSTANT; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_shader_acquire_linear (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_linear_pattern_t *linear, + const cairo_rectangle_int_t *extents) +{ + cairo_bool_t mode = LINEAR_TEXTURE; + cairo_status_t status; + + i915_shader_linear_init (&src->linear, linear); + if (linear->base.n_stops == 2 && + linear->base.stops[0].offset == 0.0 && + linear->base.stops[1].offset == 1.0) + { + if (i915_shader_linear_contains_rectangle (&src->linear, + extents)) + { + /* XXX can also lerp if contained within offset range */ + mode = LINEAR_NONE; + } + else switch (linear->base.base.extend) { + case CAIRO_EXTEND_REPEAT: + mode = LINEAR_REPEAT; + break; + case CAIRO_EXTEND_PAD: + mode = LINEAR_PAD; + break; + case CAIRO_EXTEND_NONE: + break; + case CAIRO_EXTEND_REFLECT: + break; + default: + ASSERT_NOT_REACHED; + break; + } + } + + src->type.vertex = VS_LINEAR; + src->type.pattern = PATTERN_LINEAR; + src->base.texfmt = TEXCOORDFMT_1D; + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->base.mode = mode; + if (mode == LINEAR_TEXTURE) { + intel_buffer_t buffer; + + status = intel_gradient_render ((intel_device_t *) shader->target->intel.drm.base.device, + &linear->base, &buffer); + if (unlikely (status)) + return status; + + src->type.fragment = FS_TEXTURE; + src->base.bo = intel_bo_reference (buffer.bo); + src->base.n_samplers = 1; + src->base.offset[0] = buffer.offset; + src->base.map[0] = buffer.map0; + src->base.map[1] = buffer.map1; + src->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_BILINEAR); + src->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (linear->base.base.extend); + } else { + src->type.fragment = FS_LINEAR; + src->linear.color0.red = linear->base.stops[0].color.red; + src->linear.color0.green = linear->base.stops[0].color.green; + src->linear.color0.blue = linear->base.stops[0].color.blue; + src->linear.color0.alpha = linear->base.stops[0].color.alpha; + + src->linear.color1.red = linear->base.stops[1].color.red; + src->linear.color1.green = linear->base.stops[1].color.green; + src->linear.color1.blue = linear->base.stops[1].color.blue; + src->linear.color1.alpha = linear->base.stops[1].color.alpha; + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_shader_acquire_radial (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_radial_pattern_t *radial, + const cairo_rectangle_int_t *extents) +{ + intel_buffer_t buffer; + cairo_status_t status; + + status = intel_gradient_render ((intel_device_t *) shader->target->intel.drm.base.device, + &radial->base, &buffer); + if (unlikely (status)) + return status; + + i915_shader_radial_init (&src->radial, radial); + + src->type.vertex = VS_RADIAL; + src->type.fragment = FS_RADIAL; + src->type.pattern = PATTERN_RADIAL; + src->base.texfmt = TEXCOORDFMT_2D; + + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->base.bo = intel_bo_reference (buffer.bo); + src->base.n_samplers = 1; + src->base.offset[0] = buffer.offset; + src->base.map[0] = buffer.map0; + src->base.map[1] = buffer.map1; + src->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_BILINEAR); + src->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (radial->base.base.extend); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_surface_clone (i915_device_t *device, + cairo_image_surface_t *image, + i915_surface_t **clone_out) +{ + i915_surface_t *clone; + cairo_status_t status; + + clone = + i915_surface_create_from_cacheable_image_internal (device, image); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + status = _cairo_surface_attach_snapshot (&image->base, + &clone->intel.drm.base, + intel_surface_detach_snapshot); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = intel_snapshot_cache_insert (&device->intel, &clone->intel); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_surface_clone_subimage (i915_device_t *device, + cairo_image_surface_t *image, + const cairo_rectangle_int_t *extents, + i915_surface_t **clone_out) +{ + i915_surface_t *clone; + cairo_status_t status; + + clone = (i915_surface_t *) + i915_surface_create_internal (&device->intel.base, + image->base.content, + extents->width, + extents->height, + I915_TILING_DEFAULT, + FALSE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + status = intel_bo_put_image (to_intel_device (clone->intel.drm.base.device), + to_intel_bo (clone->intel.drm.bo), + clone->intel.drm.stride, + image, + extents->x, extents->y, + extents->width, extents->height, + 0, 0); + + if (unlikely (status)) + return status; + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_shader_acquire_solid_surface (i915_shader_t *shader, + union i915_shader_channel *src, + cairo_surface_t *surface, + const cairo_rectangle_int_t *extents) +{ + cairo_surface_pattern_t pattern; + cairo_surface_t *pixel; + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + uint32_t argb; + + status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); + if (unlikely (status)) + return status; + + /* extract the pixel as argb32 */ + pixel = cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 1, 1); + _cairo_pattern_init_for_surface (&pattern, &image->base); + cairo_matrix_init_translate (&pattern.base.matrix, extents->x, extents->y); + pattern.base.filter = CAIRO_FILTER_NEAREST; + status = _cairo_surface_paint (pixel, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL); + _cairo_pattern_fini (&pattern.base); + + _cairo_surface_release_source_image (surface, image, image_extra); + + if (unlikely (status)) { + cairo_surface_destroy (pixel); + return status; + } + + image = (cairo_image_surface_t *) pixel; + argb = *(uint32_t *) image->data; + cairo_surface_destroy (pixel); + + if (argb >> 24 == 0) { + _cairo_color_init_rgba (&src->solid.color, 0, 0, 0, 0); + } else { + uint8_t alpha = argb >> 24; + + _cairo_color_init_rgba (&src->solid.color, + ((((argb >> 16) & 0xff) * 255 + alpha / 2) / alpha) / 255., + ((((argb >> 8) & 0xff) * 255 + alpha / 2) / alpha) / 255., + ((((argb >> 0) & 0xff) * 255 + alpha / 2) / alpha) / 255., + alpha / 255.); + } + + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->type.fragment = FS_CONSTANT; + src->type.vertex = VS_CONSTANT; + src->type.pattern = PATTERN_CONSTANT; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_shader_acquire_surface (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_surface_pattern_t *pattern, + const cairo_rectangle_int_t *extents) +{ + int surface_width, surface_height; + cairo_surface_t *surface, *drm; + cairo_extend_t extend; + cairo_filter_t filter; + cairo_matrix_t m; + int src_x = 0, src_y = 0; + + assert (src->type.fragment == (i915_fragment_shader_t) -1); + drm = surface = pattern->surface; + +#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS + if (surface->type == CAIRO_SURFACE_TYPE_XCB) { + cairo_surface_t *xcb = surface; + + if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + xcb = ((cairo_surface_subsurface_t *) surface)->target; + } else if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { + xcb = ((cairo_surface_snapshot_t *) surface)->target; + } + + /* XXX copy windows (IncludeInferiors) to a pixmap/drm surface + * xcb = _cairo_xcb_surface_to_drm (xcb) + */ + xcb = ((cairo_xcb_surface_t *) xcb)->drm; + if (xcb != NULL) + drm = xcb; + } +#endif + + if (surface->type == CAIRO_SURFACE_TYPE_DRM) { + if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + drm = ((cairo_surface_subsurface_t *) surface)->target; + } else if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { + drm = ((cairo_surface_snapshot_t *) surface)->target; + } + } + + if (drm->type == CAIRO_SURFACE_TYPE_DRM) { + i915_surface_t *s = (i915_surface_t *) drm; + + if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + if (s->intel.drm.base.device == shader->target->intel.drm.base.device && + s != shader->target) + { + cairo_surface_subsurface_t *sub = (cairo_surface_subsurface_t *) surface; + int x; + + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) { + /* XXX pipelined flush of RENDER/TEXTURE cache */ + } + + src->type.fragment = FS_TEXTURE; + src->surface.pixel = NONE; + surface_width = sub->extents.width; + surface_height = sub->extents.height; + + src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo)); + src->base.n_samplers = 1; + + x = sub->extents.x; + if (s->intel.drm.format != CAIRO_FORMAT_A8) + x *= 4; + + /* XXX tiling restrictions upon offset? */ + src->base.offset[0] = s->offset + sub->extents.y * s->intel.drm.stride + x; + src->base.map[0] = s->map0; + src->base.map[0] &= ~((2047 << MS3_HEIGHT_SHIFT) | (2047 << MS3_WIDTH_SHIFT)); + src->base.map[0] |= + ((sub->extents.height - 1) << MS3_HEIGHT_SHIFT) | + ((sub->extents.width - 1) << MS3_WIDTH_SHIFT); + src->base.map[1] = (s->intel.drm.stride / 4 - 1) << MS4_PITCH_SHIFT; + } + } else { + /* XXX if s == shader->dst allow if FILTER_NEAREST, EXTEND_NONE? */ + if (s->intel.drm.base.device == shader->target->intel.drm.base.device && + s != shader->target) + { + src->type.fragment = FS_TEXTURE; + src->surface.pixel = NONE; + surface_width = s->intel.drm.width; + surface_height = s->intel.drm.height; + + src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo)); + src->base.n_samplers = 1; + src->base.offset[0] = s->offset; + src->base.map[0] = s->map0; + src->base.map[1] = s->map1; + } + } + } + + if (src->type.fragment == (i915_fragment_shader_t) -1) { + i915_surface_t *s; + + if (extents->width == 1 && extents->height == 1) { + return i915_shader_acquire_solid_surface (shader, src, + surface, extents); + } + + s = (i915_surface_t *) + _cairo_surface_has_snapshot (surface, + shader->target->intel.drm.base.backend); + if (s == NULL) { + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + +#if 0 + /* XXX hackity hack hack */ + status = i915_clone_yuv (surface, src, + image->width, image->height, + clone_out); +#endif + + status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); + if (unlikely (status)) + return status; + + if (image->width < 2048 && image->height < 2048) { + status = i915_surface_clone ((i915_device_t *) shader->target->intel.drm.base.device, + image, &s); + } else { + status = i915_surface_clone_subimage ((i915_device_t *) shader->target->intel.drm.base.device, + image, extents, &s); + src_x = -extents->x; + src_y = -extents->y; + } + + surface_width = image->width; + surface_height = image->height; + + _cairo_surface_release_source_image (surface, image, image_extra); + + if (unlikely (status)) + return status; + } + + src->type.fragment = FS_TEXTURE; + src->surface.pixel = NONE; + + src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo)); + src->base.n_samplers = 1; + src->base.offset[0] = s->offset; + src->base.map[0] = s->map0; + src->base.map[1] = s->map1; + + drm = &s->intel.drm.base; + } + + /* XXX transform nx1 or 1xn surfaces to 1D */ + + src->type.pattern = PATTERN_TEXTURE; + extend = pattern->base.extend; + if (extend != CAIRO_EXTEND_NONE && + extents->x >= 0 && extents->y >= 0 && + extents->x + extents->width <= surface_width && + extents->y + extents->height <= surface_height) + { + extend = CAIRO_EXTEND_NONE; + } + if (extend == CAIRO_EXTEND_NONE) { + src->type.vertex = VS_TEXTURE_16; + src->base.texfmt = TEXCOORDFMT_2D_16; + } else { + src->type.vertex = VS_TEXTURE; + src->base.texfmt = TEXCOORDFMT_2D; + } + src->base.content = drm->content; + + filter = pattern->base.filter; + if (_cairo_matrix_is_pixel_exact (&pattern->base.matrix)) + filter = CAIRO_FILTER_NEAREST; + + src->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (filter); + src->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (extend); + + /* tweak the src matrix to map from dst to texture coordinates */ + src->base.matrix = pattern->base.matrix; + if (src_x | src_y) + cairo_matrix_translate (&src->base.matrix, src_x, src_x); + if (i915_texture_filter_is_nearest (filter)) + cairo_matrix_translate (&src->base.matrix, NEAREST_BIAS, NEAREST_BIAS); + cairo_matrix_init_scale (&m, 1. / surface_width, 1. / surface_height); + cairo_matrix_multiply (&src->base.matrix, &src->base.matrix, &m); + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +i915_shader_acquire_pattern (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_pattern_t *pattern, + const cairo_rectangle_int_t *extents) +{ + switch (pattern->type) { + case CAIRO_PATTERN_TYPE_SOLID: + return i915_shader_acquire_solid (shader, src, + (cairo_solid_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_LINEAR: + return i915_shader_acquire_linear (shader, src, + (cairo_linear_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_RADIAL: + return i915_shader_acquire_radial (shader, src, + (cairo_radial_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_SURFACE: + return i915_shader_acquire_surface (shader, src, + (cairo_surface_pattern_t *) pattern, + extents); + + default: + ASSERT_NOT_REACHED; + return CAIRO_STATUS_SUCCESS; + } +} + +static uint32_t +i915_get_blend (cairo_operator_t op, + i915_surface_t *dst) +{ +#define SBLEND(X) ((BLENDFACT_##X) << S6_CBUF_SRC_BLEND_FACT_SHIFT) +#define DBLEND(X) ((BLENDFACT_##X) << S6_CBUF_DST_BLEND_FACT_SHIFT) + static const struct blendinfo { + cairo_bool_t dst_alpha; + uint32_t src_blend; + uint32_t dst_blend; + enum { + BOUNDED, + SIMPLE, + XRENDER, + } kind; + } i915_blend_op[] = { + {0, SBLEND (ZERO), DBLEND (ZERO), BOUNDED}, /* Clear */ + {0, SBLEND (ONE), DBLEND (ZERO), BOUNDED}, /* Src */ + + {0, SBLEND (ONE), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Over */ + {1, SBLEND (DST_ALPHA), DBLEND (ZERO), XRENDER}, /* In */ + {1, SBLEND (INV_DST_ALPHA), DBLEND (ZERO), XRENDER}, /* Out */ + {1, SBLEND (DST_ALPHA), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Atop */ + + {0, SBLEND (ZERO), DBLEND (ONE), SIMPLE}, /* Dst */ + {1, SBLEND (INV_DST_ALPHA), DBLEND (ONE), SIMPLE}, /* OverReverse */ + {0, SBLEND (ZERO), DBLEND (SRC_ALPHA), XRENDER}, /* InReverse */ + {0, SBLEND (ZERO), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* OutReverse */ + {1, SBLEND (INV_DST_ALPHA), DBLEND (SRC_ALPHA), XRENDER}, /* AtopReverse */ + + {1, SBLEND (INV_DST_ALPHA), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Xor */ + {0, SBLEND (ONE), DBLEND (ONE), SIMPLE}, /* Add */ + //{0, 0, SBLEND (SRC_ALPHA_SATURATE), DBLEND (ONE), SIMPLE}, /* XXX Saturate */ + }; + uint32_t sblend, dblend; + + if (op >= ARRAY_LENGTH (i915_blend_op)) + return 0; + + if (i915_blend_op[op].kind == BOUNDED) + return 0; + + sblend = i915_blend_op[op].src_blend; + dblend = i915_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if ((dst->intel.drm.base.content & CAIRO_CONTENT_ALPHA) == 0 && + i915_blend_op[op].dst_alpha) + { + if (sblend == SBLEND (DST_ALPHA)) + sblend = SBLEND (ONE); + else if (sblend == SBLEND (INV_DST_ALPHA)) + sblend = SBLEND (ZERO); + } + + /* i915 engine reads 8bit color buffer into green channel in cases + like color buffer blending etc., and also writes back green channel. + So with dst_alpha blend we should use color factor. See spec on + "8-bit rendering" */ + if (dst->intel.drm.format == CAIRO_FORMAT_A8 && i915_blend_op[op].dst_alpha) { + if (sblend == SBLEND (DST_ALPHA)) + sblend = SBLEND (DST_COLR); + else if (sblend == SBLEND (INV_DST_ALPHA)) + sblend = SBLEND (INV_DST_COLR); + } + + return sblend | dblend; +#undef SBLEND +#undef DBLEND +} + +static void +i915_shader_channel_init (union i915_shader_channel *channel) +{ + channel->type.vertex = (i915_vertex_shader_t) -1; + channel->type.fragment = (i915_fragment_shader_t) -1; + channel->type.pattern = (i915_shader_channel_t) -1; + channel->base.texfmt = TEXCOORDFMT_NOT_PRESENT; + channel->base.bo = NULL; + channel->base.n_samplers = 0; + channel->base.mode = 0; +} + +void +i915_shader_init (i915_shader_t *shader, + i915_surface_t *dst, + cairo_operator_t op) +{ + shader->device = i915_device (dst); + shader->target = dst; + shader->op = op; + + shader->blend = i915_get_blend (op, dst); + shader->need_combine = FALSE; + + shader->content = dst->intel.drm.base.content; + + i915_shader_channel_init (&shader->source); + i915_shader_channel_init (&shader->mask); + i915_shader_channel_init (&shader->clip); + i915_shader_channel_init (&shader->dst); +} + +static void +i915_set_shader_samplers (i915_device_t *device, + const i915_shader_t *shader) +{ + uint32_t n_samplers, n; + uint32_t samplers[4 * (1+2+8)]; + uint32_t mask, tu; + + n_samplers = + shader->source.base.n_samplers + + shader->mask.base.n_samplers + + shader->clip.base.n_samplers + + shader->dst.base.n_samplers; + assert (n_samplers <= 4); + + if (n_samplers == 0) + return; + + mask = (1 << n_samplers) - 1; + + /* We check for repeated setting of sample state mainly to catch + * continuation of text strings across multiple show-glyphs. + */ + tu = 0; + if (shader->source.base.bo != NULL) { + samplers[tu++] = shader->source.base.bo->base.handle; + samplers[tu++] = shader->source.base.sampler[0]; + samplers[tu++] = shader->source.base.sampler[1]; + for (n = 0; n < shader->source.base.n_samplers; n++) { + samplers[tu++] = shader->source.base.offset[n]; + samplers[tu++] = shader->source.base.map[2*n+0]; + samplers[tu++] = shader->source.base.map[2*n+1]; + } + } + if (shader->mask.base.bo != NULL) { + samplers[tu++] = shader->mask.base.bo->base.handle; + samplers[tu++] = shader->mask.base.sampler[0]; + samplers[tu++] = shader->mask.base.sampler[1]; + for (n = 0; n < shader->mask.base.n_samplers; n++) { + samplers[tu++] = shader->mask.base.offset[n]; + samplers[tu++] = shader->mask.base.map[2*n+0]; + samplers[tu++] = shader->mask.base.map[2*n+1]; + } + } + if (shader->clip.base.bo != NULL) { + samplers[tu++] = shader->clip.base.bo->base.handle; + samplers[tu++] = shader->clip.base.sampler[0]; + samplers[tu++] = shader->clip.base.sampler[1]; + for (n = 0; n < shader->clip.base.n_samplers; n++) { + samplers[tu++] = shader->clip.base.offset[n]; + samplers[tu++] = shader->clip.base.map[2*n+0]; + samplers[tu++] = shader->clip.base.map[2*n+1]; + } + } + if (shader->dst.base.bo != NULL) { + samplers[tu++] = shader->dst.base.bo->base.handle; + samplers[tu++] = shader->dst.base.sampler[0]; + samplers[tu++] = shader->dst.base.sampler[1]; + for (n = 0; n < shader->dst.base.n_samplers; n++) { + samplers[tu++] = shader->dst.base.offset[n]; + samplers[tu++] = shader->dst.base.map[2*n+0]; + samplers[tu++] = shader->dst.base.map[2*n+1]; + } + } + + if (tu == device->current_n_samplers && + memcmp (device->current_samplers, + samplers, + tu * sizeof (uint32_t)) == 0) + { + return; + } + device->current_n_samplers = tu; + memcpy (device->current_samplers, samplers, tu * sizeof (uint32_t)); + + if (device->current_source != NULL) + *device->current_source = 0; + if (device->current_mask != NULL) + *device->current_mask = 0; + if (device->current_clip != NULL) + *device->current_clip = 0; + +#if 0 + if (shader->source.type.pattern == PATTERN_TEXTURE) { + switch ((int) shader->source.surface.surface->type) { + case CAIRO_SURFACE_TYPE_DRM: + { + i915_surface_t *surface = + (i915_surface_t *) shader->source.surface.surface; + device->current_source = &surface->is_current_texture; + surface->is_current_texture |= CURRENT_SOURCE; + break; + } + + case I915_PACKED_PIXEL_SURFACE_TYPE: + { + i915_packed_pixel_surface_t *surface = + (i915_packed_pixel_surface_t *) shader->source.surface.surface; + device->current_source = &surface->is_current_texture; + surface->is_current_texture |= CURRENT_SOURCE; + break; + } + + default: + device->current_source = NULL; + break; + } + } else + device->current_source = NULL; + + if (shader->mask.type.pattern == PATTERN_TEXTURE) { + switch ((int) shader->mask.surface.surface->type) { + case CAIRO_SURFACE_TYPE_DRM: + { + i915_surface_t *surface = + (i915_surface_t *) shader->mask.surface.surface; + device->current_mask = &surface->is_current_texture; + surface->is_current_texture |= CURRENT_MASK; + break; + } + + case I915_PACKED_PIXEL_SURFACE_TYPE: + { + i915_packed_pixel_surface_t *surface = + (i915_packed_pixel_surface_t *) shader->mask.surface.surface; + device->current_mask = &surface->is_current_texture; + surface->is_current_texture |= CURRENT_MASK; + break; + } + + default: + device->current_mask = NULL; + break; + } + } else + device->current_mask = NULL; +#endif + + OUT_DWORD (_3DSTATE_MAP_STATE | (3 * n_samplers)); + OUT_DWORD (mask); + if (shader->source.base.bo != NULL) { + for (n = 0; n < shader->source.base.n_samplers; n++) { + i915_batch_emit_reloc (device, shader->source.base.bo, + shader->source.base.offset[n], + I915_GEM_DOMAIN_SAMPLER, 0); + OUT_DWORD (shader->source.base.map[2*n+0]); + OUT_DWORD (shader->source.base.map[2*n+1]); + } + } + if (shader->mask.base.bo != NULL) { + for (n = 0; n < shader->mask.base.n_samplers; n++) { + i915_batch_emit_reloc (device, shader->mask.base.bo, + shader->mask.base.offset[n], + I915_GEM_DOMAIN_SAMPLER, 0); + OUT_DWORD (shader->mask.base.map[2*n+0]); + OUT_DWORD (shader->mask.base.map[2*n+1]); + } + } + if (shader->clip.base.bo != NULL) { + for (n = 0; n < shader->clip.base.n_samplers; n++) { + i915_batch_emit_reloc (device, shader->clip.base.bo, + shader->clip.base.offset[n], + I915_GEM_DOMAIN_SAMPLER, 0); + OUT_DWORD (shader->clip.base.map[2*n+0]); + OUT_DWORD (shader->clip.base.map[2*n+1]); + } + } + if (shader->dst.base.bo != NULL) { + for (n = 0; n < shader->dst.base.n_samplers; n++) { + i915_batch_emit_reloc (device, shader->dst.base.bo, + shader->dst.base.offset[n], + I915_GEM_DOMAIN_SAMPLER, 0); + OUT_DWORD (shader->dst.base.map[2*n+0]); + OUT_DWORD (shader->dst.base.map[2*n+1]); + } + } + + OUT_DWORD (_3DSTATE_SAMPLER_STATE | (3 * n_samplers)); + OUT_DWORD (mask); + tu = 0; + if (shader->source.base.bo != NULL) { + for (n = 0; n < shader->source.base.n_samplers; n++) { + OUT_DWORD (shader->source.base.sampler[0]); + OUT_DWORD (shader->source.base.sampler[1] | + (tu << SS3_TEXTUREMAP_INDEX_SHIFT)); + OUT_DWORD (0x0); + tu++; + } + } + if (shader->mask.base.bo != NULL) { + for (n = 0; n < shader->mask.base.n_samplers; n++) { + OUT_DWORD (shader->mask.base.sampler[0]); + OUT_DWORD (shader->mask.base.sampler[1] | + (tu << SS3_TEXTUREMAP_INDEX_SHIFT)); + OUT_DWORD (0x0); + tu++; + } + } + if (shader->clip.base.bo != NULL) { + for (n = 0; n < shader->clip.base.n_samplers; n++) { + OUT_DWORD (shader->clip.base.sampler[0]); + OUT_DWORD (shader->clip.base.sampler[1] | + (tu << SS3_TEXTUREMAP_INDEX_SHIFT)); + OUT_DWORD (0x0); + tu++; + } + } + if (shader->dst.base.bo != NULL) { + for (n = 0; n < shader->dst.base.n_samplers; n++) { + OUT_DWORD (shader->dst.base.sampler[0]); + OUT_DWORD (shader->dst.base.sampler[1] | + (tu << SS3_TEXTUREMAP_INDEX_SHIFT)); + OUT_DWORD (0x0); + tu++; + } + } +} + +static uint32_t +i915_shader_get_texcoords (const i915_shader_t *shader) +{ + uint32_t texcoords; + uint32_t tu; + + texcoords = S2_TEXCOORD_NONE; + tu = 0; + if (shader->source.base.texfmt != TEXCOORDFMT_NOT_PRESENT) { + texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK); + texcoords |= S2_TEXCOORD_FMT (tu, shader->source.base.texfmt); + tu++; + } + if (shader->mask.base.texfmt != TEXCOORDFMT_NOT_PRESENT) { + texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK); + texcoords |= S2_TEXCOORD_FMT (tu, shader->mask.base.texfmt); + tu++; + } + if (shader->clip.base.texfmt != TEXCOORDFMT_NOT_PRESENT) { + texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK); + texcoords |= S2_TEXCOORD_FMT (tu, shader->clip.base.texfmt); + tu++; + } + if (shader->dst.base.texfmt != TEXCOORDFMT_NOT_PRESENT) { + texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK); + texcoords |= S2_TEXCOORD_FMT (tu, shader->dst.base.texfmt); + tu++; + } + + return texcoords; +} + +static void +i915_set_shader_mode (i915_device_t *device, + const i915_shader_t *shader) +{ + uint32_t texcoords; + uint32_t mask, cnt; + + texcoords = i915_shader_get_texcoords (shader); + + mask = cnt = 0; + + if (device->current_texcoords != texcoords) + mask |= I1_LOAD_S (2), cnt++; + + if (device->current_blend != shader->blend) + mask |= I1_LOAD_S (6), cnt++; + + if (cnt == 0) + return; + + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | mask | (cnt-1)); + + if (device->current_texcoords != texcoords) { + OUT_DWORD (texcoords); + device->current_texcoords = texcoords; + } + + if (device->current_blend != shader->blend) { + if (shader->blend) { + OUT_DWORD (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | + (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) | + shader->blend); + } else { + OUT_DWORD (S6_COLOR_WRITE_ENABLE); + } + + device->current_blend = shader->blend; + } +} + +static void +i915_set_constants (i915_device_t *device, + const uint32_t *constants, + uint32_t n_constants) +{ + uint32_t n; + + OUT_DWORD (_3DSTATE_PIXEL_SHADER_CONSTANTS | n_constants); + OUT_DWORD ((1 << (n_constants >> 2)) - 1); + + for (n = 0; n < n_constants; n++) + OUT_DWORD (constants[n]); + + device->current_n_constants = n_constants; + memcpy (device->current_constants, constants, n_constants*4); +} + +static inline uint32_t +pack_float (float f) +{ + union { + float f; + uint32_t ui; + } t; + t.f = f; + return t.ui; +} + +static uint32_t +pack_constants (const union i915_shader_channel *channel, + uint32_t *constants) +{ + uint32_t count = 0, n; + + switch (channel->type.fragment) { + case FS_ZERO: + case FS_ONE: + case FS_PURE: + case FS_DIFFUSE: + break; + + case FS_CONSTANT: + constants[count++] = pack_float (channel->solid.color.red); + constants[count++] = pack_float (channel->solid.color.green); + constants[count++] = pack_float (channel->solid.color.blue); + constants[count++] = pack_float (channel->solid.color.alpha); + break; + + case FS_LINEAR: + constants[count++] = pack_float (channel->linear.color0.red); + constants[count++] = pack_float (channel->linear.color0.green); + constants[count++] = pack_float (channel->linear.color0.blue); + constants[count++] = pack_float (channel->linear.color0.alpha); + + constants[count++] = pack_float (channel->linear.color1.red); + constants[count++] = pack_float (channel->linear.color1.green); + constants[count++] = pack_float (channel->linear.color1.blue); + constants[count++] = pack_float (channel->linear.color1.alpha); + break; + + case FS_RADIAL: + for (n = 0; n < ARRAY_LENGTH (channel->radial.constants); n++) + constants[count++] = pack_float (channel->radial.constants[n]); + break; + + case FS_TEXTURE: + case FS_YUV: + case FS_SPANS: + break; + } + + return count; +} + +static void +i915_set_shader_constants (i915_device_t *device, + const i915_shader_t *shader) +{ + uint32_t constants[4*4*3]; + unsigned n_constants; + + n_constants = 0; + if (shader->source.type.fragment == FS_DIFFUSE) { + uint32_t diffuse; + + diffuse = + ((shader->source.solid.color.alpha_short >> 8) << 24) | + ((shader->source.solid.color.red_short >> 8) << 16) | + ((shader->source.solid.color.green_short >> 8) << 8) | + ((shader->source.solid.color.blue_short >> 8) << 0); + + if (diffuse != device->current_diffuse) { + OUT_DWORD (_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_DWORD (diffuse); + device->current_diffuse = diffuse; + } + } else { + n_constants += pack_constants (&shader->source, constants + n_constants); + } + n_constants += pack_constants (&shader->mask, constants + n_constants); + + if (n_constants != 0 && + (device->current_n_constants != n_constants || + memcmp (device->current_constants, constants, n_constants*4))) + { + i915_set_constants (device, constants, n_constants); + } +} + +static cairo_bool_t +i915_shader_needs_update (const i915_shader_t *shader, + const i915_device_t *device) +{ + uint32_t count, n; + uint32_t buf[64]; + + if (device->current_target != shader->target) + return TRUE; + + count = + shader->source.base.n_samplers + + shader->mask.base.n_samplers + + shader->clip.base.n_samplers + + shader->dst.base.n_samplers; + if (count > 4) + return TRUE; + + if (count != 0) { + count *= 3; + if (shader->source.base.bo != NULL) + count += 3; + if (shader->mask.base.bo != NULL) + count += 3; + if (shader->clip.base.bo != NULL) + count += 3; + if (shader->dst.base.bo != NULL) + count += 3; + + if (count != device->current_n_samplers) + return TRUE; + + if (count != 0) { + count = 0; + if (shader->source.base.bo != NULL) { + buf[count++] = shader->source.base.bo->base.handle; + buf[count++] = shader->source.base.sampler[0]; + buf[count++] = shader->source.base.sampler[1]; + for (n = 0; n < shader->source.base.n_samplers; n++) { + buf[count++] = shader->source.base.offset[n]; + buf[count++] = shader->source.base.map[2*n+0]; + buf[count++] = shader->source.base.map[2*n+1]; + } + } + if (shader->mask.base.bo != NULL) { + buf[count++] = shader->mask.base.bo->base.handle; + buf[count++] = shader->mask.base.sampler[0]; + buf[count++] = shader->mask.base.sampler[1]; + for (n = 0; n < shader->mask.base.n_samplers; n++) { + buf[count++] = shader->mask.base.offset[n]; + buf[count++] = shader->mask.base.map[2*n+0]; + buf[count++] = shader->mask.base.map[2*n+1]; + } + } + if (shader->clip.base.bo != NULL) { + buf[count++] = shader->clip.base.bo->base.handle; + buf[count++] = shader->clip.base.sampler[0]; + buf[count++] = shader->clip.base.sampler[1]; + for (n = 0; n < shader->clip.base.n_samplers; n++) { + buf[count++] = shader->clip.base.offset[n]; + buf[count++] = shader->clip.base.map[2*n+0]; + buf[count++] = shader->clip.base.map[2*n+1]; + } + } + if (shader->dst.base.bo != NULL) { + buf[count++] = shader->dst.base.bo->base.handle; + buf[count++] = shader->dst.base.sampler[0]; + buf[count++] = shader->dst.base.sampler[1]; + for (n = 0; n < shader->dst.base.n_samplers; n++) { + buf[count++] = shader->dst.base.offset[n]; + buf[count++] = shader->dst.base.map[2*n+0]; + buf[count++] = shader->dst.base.map[2*n+1]; + } + } + + assert (count == device->current_n_samplers); + if (memcmp (device->current_samplers, buf, count * sizeof (uint32_t))) + return TRUE; + } + } + + if (i915_shader_get_texcoords (shader) != device->current_texcoords) + return TRUE; + if (device->current_blend != shader->blend) + return TRUE; + + count = 0; + if (shader->source.type.fragment == FS_DIFFUSE) { + uint32_t diffuse; + + diffuse = + ((shader->source.solid.color.alpha_short >> 8) << 24) | + ((shader->source.solid.color.red_short >> 8) << 16) | + ((shader->source.solid.color.green_short >> 8) << 8) | + ((shader->source.solid.color.blue_short >> 8) << 0); + + if (diffuse != device->current_diffuse) + return TRUE; + } else { + count += pack_constants (&shader->source, buf + count); + } + count += pack_constants (&shader->mask, buf + count); + + if (count && + (device->current_n_constants != count || + memcmp (device->current_constants, buf, count*4))) + { + return TRUE; + } + + n = (i915_shader_channel_key (&shader->source) << 0) | + (i915_shader_channel_key (&shader->mask) << 8) | + (i915_shader_channel_key (&shader->clip) << 16) | + (shader->op << 24) | + (((shader->content & CAIRO_CONTENT_ALPHA) == CAIRO_CONTENT_ALPHA) << 31); + return n != device->current_program; +} + +static void +i915_set_shader_target (i915_device_t *device, + const i915_shader_t *shader) +{ + i915_surface_t *dst; + intel_bo_t *bo; + uint32_t size; + + dst = shader->target; + if (device->current_target == dst) + return; + + bo = to_intel_bo (dst->intel.drm.bo); + assert (bo != NULL); + + OUT_DWORD (_3DSTATE_BUF_INFO_CMD); + OUT_DWORD (BUF_3D_ID_COLOR_BACK | + BUF_tiling (bo->tiling) | + BUF_3D_PITCH (dst->intel.drm.stride)); + OUT_RELOC (dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + + if (dst->colorbuf != device->current_colorbuf) { + OUT_DWORD (_3DSTATE_DST_BUF_VARS_CMD); + OUT_DWORD (dst->colorbuf); + device->current_colorbuf = dst->colorbuf; + } + + size = DRAW_YMAX (dst->intel.drm.height) | DRAW_XMAX (dst->intel.drm.width); + if (size != device->current_size) { + OUT_DWORD (_3DSTATE_DRAW_RECT_CMD); + OUT_DWORD (0); /* dither */ + OUT_DWORD (0); /* top-left */ + OUT_DWORD (size); + OUT_DWORD (0); /* origin */ + device->current_size = size; + } + + device->current_target = dst; +} + +int +i915_shader_num_texcoords (const i915_shader_t *shader) +{ + int cnt = 0; + + switch (shader->source.base.texfmt) { + default: + ASSERT_NOT_REACHED; + case TEXCOORDFMT_NOT_PRESENT: break; + case TEXCOORDFMT_2D: cnt += 2; break; + case TEXCOORDFMT_3D: cnt += 3; break; + case TEXCOORDFMT_4D: cnt += 4; break; + case TEXCOORDFMT_1D: cnt += 1; break; + case TEXCOORDFMT_2D_16: cnt += 1; break; + } + + switch (shader->mask.base.texfmt) { + default: + ASSERT_NOT_REACHED; + case TEXCOORDFMT_NOT_PRESENT: break; + case TEXCOORDFMT_2D: cnt += 2; break; + case TEXCOORDFMT_3D: cnt += 3; break; + case TEXCOORDFMT_4D: cnt += 4; break; + case TEXCOORDFMT_1D: cnt += 1; break; + case TEXCOORDFMT_2D_16: cnt += 1; break; + } + + switch (shader->clip.base.texfmt) { + default: + ASSERT_NOT_REACHED; + case TEXCOORDFMT_NOT_PRESENT: break; + case TEXCOORDFMT_2D: cnt += 2; break; + case TEXCOORDFMT_3D: cnt += 3; break; + case TEXCOORDFMT_4D: cnt += 4; break; + case TEXCOORDFMT_1D: cnt += 1; break; + case TEXCOORDFMT_2D_16: cnt += 1; break; + } + + switch (shader->dst.base.texfmt) { + default: + ASSERT_NOT_REACHED; + case TEXCOORDFMT_NOT_PRESENT: break; + case TEXCOORDFMT_2D: cnt += 2; break; + case TEXCOORDFMT_3D: cnt += 3; break; + case TEXCOORDFMT_4D: cnt += 4; break; + case TEXCOORDFMT_1D: cnt += 1; break; + case TEXCOORDFMT_2D_16: cnt += 1; break; + } + + return cnt; +} + +void +i915_shader_fini (i915_shader_t *shader) +{ + i915_device_t *device = i915_device (shader->target); + + switch (shader->source.type.pattern) { + case PATTERN_TEXTURE: + case PATTERN_BASE: + case PATTERN_LINEAR: + case PATTERN_RADIAL: + if (shader->source.base.bo != NULL) + cairo_drm_bo_destroy (&device->intel.base.base, &shader->source.base.bo->base); + break; + + default: + case PATTERN_CONSTANT: + break; + } + + switch (shader->mask.type.pattern) { + case PATTERN_TEXTURE: + case PATTERN_BASE: + case PATTERN_LINEAR: + case PATTERN_RADIAL: + if (shader->mask.base.bo != NULL) + cairo_drm_bo_destroy (&device->intel.base.base, &shader->mask.base.bo->base); + break; + + default: + case PATTERN_CONSTANT: + break; + } + + switch (shader->clip.type.pattern) { + case PATTERN_TEXTURE: + case PATTERN_BASE: + case PATTERN_LINEAR: + case PATTERN_RADIAL: + if (shader->clip.base.bo != NULL) + cairo_drm_bo_destroy (&device->intel.base.base, &shader->clip.base.bo->base); + break; + + default: + case PATTERN_CONSTANT: + break; + } +} + +void +i915_shader_set_clip (i915_shader_t *shader, + cairo_clip_t *clip) +{ + cairo_surface_t *clip_surface; + const cairo_rectangle_int_t *clip_extents; + union i915_shader_channel *channel; + i915_surface_t *s; + + clip_surface = _cairo_clip_get_surface (clip, &shader->target->intel.drm.base); + assert (clip_surface->status == CAIRO_STATUS_SUCCESS); + assert (clip_surface->type == CAIRO_SURFACE_TYPE_DRM); + + channel = &shader->clip; + channel->type.pattern = PATTERN_TEXTURE; + channel->type.vertex = VS_TEXTURE_16; + channel->base.texfmt = TEXCOORDFMT_2D_16; + channel->base.content = CAIRO_CONTENT_ALPHA; + + channel->type.fragment = FS_TEXTURE; + channel->surface.pixel = NONE; + + s = (i915_surface_t *) clip_surface; + channel->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo)); + channel->base.n_samplers = 1; + channel->base.offset[0] = s->offset; + channel->base.map[0] = s->map0; + channel->base.map[1] = s->map1; + + channel->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_NEAREST); + channel->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_scale (&shader->clip.base.matrix, + 1. / s->intel.drm.width, + 1. / s->intel.drm.height); + + clip_extents = _cairo_clip_get_extents (clip); + cairo_matrix_translate (&shader->clip.base.matrix, + NEAREST_BIAS + clip_extents->x, + NEAREST_BIAS + clip_extents->y); +} + +static cairo_status_t +i915_shader_check_aperture (i915_shader_t *shader, + i915_device_t *device) +{ + cairo_status_t status; + intel_bo_t *bo_array[4]; + uint32_t n = 0; + + if (shader->target != device->current_target) + bo_array[n++] = to_intel_bo (shader->target->intel.drm.bo); + + if (shader->source.base.bo != NULL) + bo_array[n++] = shader->source.base.bo; + + if (shader->mask.base.bo != NULL) + bo_array[n++] = shader->mask.base.bo; + + if (shader->clip.base.bo != NULL) + bo_array[n++] = shader->clip.base.bo; + + if (n == 0 || i915_check_aperture (device, bo_array, n)) + return CAIRO_STATUS_SUCCESS; + + status = i915_batch_flush (device); + if (unlikely (status)) + return status; + + assert (i915_check_aperture (device, bo_array, n)); + return CAIRO_STATUS_SUCCESS; +} + +static void +i915_shader_combine_mask (i915_shader_t *shader) +{ + if (shader->mask.type.fragment == (i915_fragment_shader_t) -1 || + shader->mask.type.fragment == FS_CONSTANT) + { + return; + } + + if (shader->mask.type.fragment == FS_PURE) { + if (shader->mask.solid.pure & (1<<3)) { + shader->mask.type.fragment = FS_ONE; + } else { + shader->mask.type.fragment = FS_ZERO; + } + } + + if (shader->mask.type.fragment == FS_ONE || + (shader->mask.base.content & CAIRO_CONTENT_ALPHA) == 0) + { + shader->mask.type.vertex = (i915_vertex_shader_t) -1; + shader->mask.type.fragment = (i915_fragment_shader_t) -1; + shader->mask.base.texfmt = TEXCOORDFMT_NOT_PRESENT; + shader->mask.base.mode = 0; + } + + if (shader->mask.type.fragment == FS_ZERO) { + shader->source.type.fragment = FS_ZERO; + shader->source.type.vertex = VS_CONSTANT; + shader->source.base.texfmt = TEXCOORDFMT_NOT_PRESENT; + shader->source.base.mode = 0; + } + + if (shader->source.type.fragment == FS_ZERO) { + shader->mask.type.vertex = (i915_vertex_shader_t) -1; + shader->mask.type.fragment = (i915_fragment_shader_t) -1; + shader->mask.base.texfmt = TEXCOORDFMT_NOT_PRESENT; + shader->mask.base.mode = 0; + } +} + +static void +i915_shader_setup_dst (i915_shader_t *shader) +{ + union i915_shader_channel *channel; + i915_surface_t *s; + + /* We need to manual blending if we have a clip surface and an unbounded op, + * or an extended blend mode. + */ + if (shader->need_combine || + (shader->op < CAIRO_OPERATOR_SATURATE && + (shader->clip.type.fragment == (i915_fragment_shader_t) -1 || + _cairo_operator_bounded_by_mask (shader->op)))) + { + return; + } + + shader->need_combine = TRUE; + + channel = &shader->dst; + channel->type.pattern = PATTERN_TEXTURE; + channel->type.vertex = VS_TEXTURE_16; + channel->base.texfmt = TEXCOORDFMT_2D_16; + channel->base.content = shader->content; + + channel->type.fragment = FS_TEXTURE; + channel->surface.pixel = NONE; + + s = shader->target; + channel->base.bo = to_intel_bo (s->intel.drm.bo); + channel->base.n_samplers = 1; + channel->base.offset[0] = s->offset; + channel->base.map[0] = s->map0; + channel->base.map[1] = s->map1; + + channel->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_NEAREST); + channel->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_scale (&shader->dst.base.matrix, + 1. / s->intel.drm.width, + 1. / s->intel.drm.height); + + cairo_matrix_translate (&shader->dst.base.matrix, + NEAREST_BIAS, + NEAREST_BIAS); +} + +static void +i915_shader_combine_source (i915_shader_t *shader, + i915_device_t *device) +{ + if (device->last_source_fragment == shader->source.type.fragment) + return; + + if (device->last_source_fragment == FS_DIFFUSE) { + switch (shader->source.type.fragment) { + case FS_ONE: + case FS_PURE: + case FS_CONSTANT: + case FS_DIFFUSE: + shader->source.type.fragment = FS_DIFFUSE; + shader->source.base.mode = 0; + break; + case FS_ZERO: + case FS_LINEAR: + case FS_RADIAL: + case FS_TEXTURE: + case FS_YUV: + case FS_SPANS: + default: + break; + } + } + + device->last_source_fragment = shader->source.type.fragment; +} + +static inline float * +i915_composite_vertex (float *v, + const i915_shader_t *shader, + double x, double y) +{ + double s, t; + + /* Each vertex is: + * 2 vertex coordinates + * [0-2] source texture coordinates + * [0-2] mask texture coordinates + */ + + *v++ = x; *v++ = y; + switch (shader->source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->source.linear, x, y); + break; + case VS_RADIAL: + case VS_TEXTURE: + s = x, t = y; + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + s = x, t = y; + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + switch (shader->mask.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->mask.linear, x, y); + break; + case VS_RADIAL: + s = x, t = y; + cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE: + s = x, t = y; + cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + s = x, t = y; + cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + + return v; +} + +static inline void +i915_shader_add_rectangle_general (const i915_shader_t *shader, + int x, int y, + int w, int h) +{ + float *vertices; + + vertices = i915_add_rectangle (shader->device); + vertices = i915_composite_vertex (vertices, shader, x + w, y + h); + vertices = i915_composite_vertex (vertices, shader, x, y + h); + vertices = i915_composite_vertex (vertices, shader, x, y); + /* XXX overflow! */ +} + +cairo_status_t +i915_shader_commit (i915_shader_t *shader, + i915_device_t *device) +{ + unsigned floats_per_vertex; + cairo_status_t status; + + i915_shader_combine_source (shader, device); + i915_shader_combine_mask (shader); + i915_shader_setup_dst (shader); + + if (i915_shader_needs_update (shader, device)) { + if (device->vertex_count) { + status = i915_vbo_flush (device); + if (unlikely (status)) + return status; + } + + status = i915_shader_check_aperture (shader, device); + if (unlikely (status)) + return status; + + i915_set_shader_target (device, shader); + i915_set_shader_mode (device, shader); + i915_set_shader_samplers (device, shader); + i915_set_shader_constants (device, shader); + i915_set_shader_program (device, shader); + } + + device->current_shader = shader; + shader->add_rectangle = i915_shader_add_rectangle_general; + + floats_per_vertex = 2 + i915_shader_num_texcoords (shader); + if (device->floats_per_vertex == floats_per_vertex) + return CAIRO_STATUS_SUCCESS; + + if (device->vertex_count) { + status = i915_vbo_flush (device); + if (unlikely (status)) + return status; + } + + if (device->vbo) { + device->batch_base[device->vbo_max_index] |= device->vertex_index; + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S (1) | 0); + device->vbo_max_index = device->batch.used; + OUT_DWORD ((floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + } + + device->floats_per_vertex = floats_per_vertex; + device->rectangle_size = floats_per_vertex * 3 * sizeof (float); + device->vertex_index = + (device->vbo_used + 4*floats_per_vertex - 1) / (4 * floats_per_vertex); + device->vbo_offset = 4 * device->vertex_index * floats_per_vertex; + + return CAIRO_STATUS_SUCCESS; +} diff --git a/src/drm/cairo-drm-i915-spans.c b/src/drm/cairo-drm-i915-spans.c new file mode 100644 index 00000000..0f1617d7 --- /dev/null +++ b/src/drm/cairo-drm-i915-spans.c @@ -0,0 +1,708 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Red Hat, Inc. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-composite-rectangles-private.h" +#include "cairo-boxes-private.h" +#include "cairo-error-private.h" +#include "cairo-drm-i915-private.h" + +/* Operates in either immediate or retained mode. + * When given a clip region we record the sequence of vbo and then + * replay them for each clip rectangle, otherwise we simply emit + * the vbo straight into the command stream. + */ + +typedef struct _i915_spans i915_spans_t; + +typedef float * +(*i915_get_rectangle_func_t) (i915_spans_t *spans); + +typedef void +(*i915_span_func_t) (i915_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha); + +struct _i915_spans { + cairo_span_renderer_t renderer; + + i915_device_t *device; + + int xmin, xmax; + cairo_bool_t is_bounded; + const cairo_rectangle_int_t *extents; + + i915_get_rectangle_func_t get_rectangle; + i915_span_func_t span; + i915_shader_t shader; + + cairo_region_t *clip_region; + cairo_bool_t need_clip_surface; + + struct vbo { + struct vbo *next; + intel_bo_t *bo; + unsigned int count; + } head, *tail; + + int rectangle_size; + + unsigned int vbo_offset; + float *vbo_base; +}; + +static float * +i915_emit_rectangle (i915_spans_t *spans) +{ + return i915_add_rectangle (spans->device); +} + +static float * +i915_accumulate_rectangle (i915_spans_t *spans) +{ + float *vertices; + uint32_t size; + + size = spans->rectangle_size; + if (unlikely (spans->vbo_offset + size > I915_VBO_SIZE)) { + struct vbo *vbo; + + intel_bo_unmap (spans->tail->bo); + + vbo = malloc (sizeof (struct vbo)); + if (unlikely (vbo == NULL)) { + /* throw error! */ + } + + spans->tail->next = vbo; + spans->tail = vbo; + + vbo->next = NULL; + vbo->bo = intel_bo_create (&spans->device->intel, I915_VBO_SIZE, FALSE); + vbo->count = 0; + + spans->vbo_offset = 0; + spans->vbo_base = intel_bo_map (&spans->device->intel, vbo->bo); + } + + vertices = spans->vbo_base + spans->vbo_offset; + spans->vbo_offset += size; + spans->tail->count += 3; + + return vertices; +} + +static void +i915_span_constant (i915_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha) +{ + float *vertices; + float a = alpha / 255.; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + *vertices++ = a; +} + +static void +i915_span_linear (i915_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha) +{ + float *vertices; + float a = alpha / 255.; + double s, t; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + s = x0, t = y0; + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + s = x1, t = y0; + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + s = x1, t = y1; + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + *vertices++ = a; +} + +static void +i915_span_radial (i915_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha) +{ + float *vertices; + float a = alpha / 255.; + double s, t; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + s = x0, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + s = x1, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + s = x1, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; +} + +static void +i915_span_texture (i915_spans_t *spans, + int x0, int x1, int y0, int y1, int alpha) +{ + float *vertices; + float a = alpha / 255.; + double s, t; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + s = x0, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + s = x1, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + s = x1, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; +} + +static void +i915_span_texture16 (i915_spans_t *spans, + int x0, int x1, int y0, int y1, int alpha) +{ + float *vertices; + float a = alpha / 255.; + double s, t; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + s = x0, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + s = x1, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + s = x1, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + *vertices++ = a; +} + +static void +i915_span_generic (i915_spans_t *spans, + int x0, int x1, int y0, int y1, int alpha) +{ + double s, t; + float *vertices; + + /* Each vertex is: + * 2 vertex coordinates + * [0-2] source texture coordinates + * 1 alpha value. + * [0,2] clip mask coordinates + */ + + vertices = spans->get_rectangle (spans); + + /* bottom right */ + *vertices++ = x1; *vertices++ = y1; + s = x1, t = y1; + switch (spans->shader.source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + break; + case VS_RADIAL: + case VS_TEXTURE: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + break; + } + *vertices++ = alpha; + if (spans->need_clip_surface) { + s = x1, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + } + + /* bottom left */ + *vertices++ = x0; *vertices++ = y1; + s = x0, t = y1; + switch (spans->shader.source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + break; + case VS_RADIAL: + case VS_TEXTURE: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + break; + } + *vertices++ = alpha; + if (spans->need_clip_surface) { + s = x0, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + } + + /* top left */ + *vertices++ = x0; *vertices++ = y0; + s = x0, t = y0; + switch (spans->shader.source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + break; + case VS_RADIAL: + case VS_TEXTURE: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + break; + } + *vertices++ = alpha; + if (spans->need_clip_surface) { + s = x0, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + } +} + +static cairo_status_t +i915_bounded_spans_mono (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i915_spans_t *spans = abstract_renderer; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + do { + if (half[0].coverage >= 128) { + spans->span (spans, + half[0].x, half[1].x, + y, y + height, + 255); + } + half++; + } while (--num_spans > 1); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_bounded_spans (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i915_spans_t *spans = abstract_renderer; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + do { + if (half[0].coverage) { + spans->span (spans, + half[0].x, half[1].x, + y, y + height, + half[0].coverage); + } + half++; + } while (--num_spans > 1); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_unbounded_spans (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i915_spans_t *spans = abstract_renderer; + + if (num_spans == 0) { + spans->span (spans, + spans->xmin, spans->xmax, + y, y + height, + 0); + return CAIRO_STATUS_SUCCESS; + } + + if (half[0].x != spans->xmin) { + spans->span (spans, + spans->xmin, half[0].x, + y, y + height, + 0); + } + + do { + spans->span (spans, + half[0].x, half[1].x, + y, y + height, + half[0].coverage); + half++; + } while (--num_spans > 1); + + if (half[0].x != spans->xmax) { + spans->span (spans, + half[0].x, spans->xmax, + y, y + height, + 0); + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_unbounded_spans_mono (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i915_spans_t *spans = abstract_renderer; + + if (num_spans == 0) { + spans->span (spans, + spans->xmin, spans->xmax, + y, y + height, + 0); + return CAIRO_STATUS_SUCCESS; + } + + if (half[0].x != spans->xmin) { + spans->span (spans, + spans->xmin, half[0].x, + y, y + height, + 0); + } + + do { + int alpha = 0; + if (half[0].coverage >= 128) + alpha = 255; + spans->span (spans, + half[0].x, half[1].x, + y, y + height, + alpha); + half++; + } while (--num_spans > 1); + + if (half[0].x != spans->xmax) { + spans->span (spans, + half[0].x, spans->xmax, + y, y + height, + 0); + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_spans_init (i915_spans_t *spans, + i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + cairo_status_t status; + + spans->device = (i915_device_t *) dst->intel.drm.base.device; + + spans->is_bounded = extents->is_bounded; + if (extents->is_bounded) { + if (antialias == CAIRO_ANTIALIAS_NONE) + spans->renderer.render_rows = i915_bounded_spans_mono; + else + spans->renderer.render_rows = i915_bounded_spans; + + spans->extents = &extents->bounded; + } else { + if (antialias == CAIRO_ANTIALIAS_NONE) + spans->renderer.render_rows = i915_unbounded_spans_mono; + else + spans->renderer.render_rows = i915_unbounded_spans; + + spans->extents = &extents->unbounded; + } + spans->xmin = spans->extents->x; + spans->xmax = spans->extents->x + spans->extents->width; + + spans->clip_region = NULL; + spans->need_clip_surface = FALSE; + if (clip != NULL) { + cairo_region_t *clip_region = NULL; + + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + spans->clip_region = clip_region; + spans->need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + } + + spans->head.next = NULL; + spans->head.bo = NULL; + spans->head.count = 0; + spans->tail = &spans->head; + + if (spans->clip_region == NULL) { + spans->get_rectangle = i915_emit_rectangle; + } else { + assert (! extents->is_bounded); + spans->get_rectangle = i915_accumulate_rectangle; + spans->head.bo = intel_bo_create (&spans->device->intel, + I915_VBO_SIZE, FALSE); + if (unlikely (spans->head.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + spans->vbo_base = intel_bo_map (&spans->device->intel, spans->head.bo); + } + spans->vbo_offset = 0; + + i915_shader_init (&spans->shader, dst, op); + if (spans->need_clip_surface) + i915_shader_set_clip (&spans->shader, clip); + + status = i915_shader_acquire_pattern (&spans->shader, &spans->shader.source, + pattern, &extents->bounded); + if (unlikely (status)) + return status; + + if (! spans->need_clip_surface) { + switch (spans->shader.source.type.vertex) { + case VS_CONSTANT: + spans->span = i915_span_constant; + break; + case VS_LINEAR: + spans->span = i915_span_linear; + break; + case VS_RADIAL: + spans->span = i915_span_radial; + break; + case VS_TEXTURE: + spans->span = i915_span_texture; + break; + case VS_TEXTURE_16: + spans->span = i915_span_texture16; + break; + default: + spans->span = i915_span_generic; + break; + } + } else { + spans->span = i915_span_generic; + } + + spans->rectangle_size = 3 * (2 + i915_shader_num_texcoords (&spans->shader)); + return CAIRO_STATUS_SUCCESS; +} + +static void +i915_spans_fini (i915_spans_t *spans) +{ + i915_shader_fini (&spans->shader); + + if (spans->head.bo != NULL) { + struct vbo *vbo, *next; + + intel_bo_destroy (&spans->device->intel, spans->head.bo); + for (vbo = spans->head.next; vbo != NULL; vbo = next) { + next = vbo->next; + intel_bo_destroy (&spans->device->intel, vbo->bo); + free (vbo); + } + } +} + +cairo_status_t +i915_clip_and_composite_spans (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + i915_spans_func_t draw_func, + void *draw_closure, + const cairo_composite_rectangles_t*extents, + cairo_clip_t *clip) +{ + i915_spans_t spans; + i915_device_t *device; + cairo_status_t status; + struct vbo *vbo; + + if (op == CAIRO_OPERATOR_CLEAR) { + pattern = &_cairo_pattern_white.base; + op = CAIRO_OPERATOR_DEST_OUT; + } + + status = i915_spans_init (&spans, dst, op, pattern, antialias, clip, extents); + if (unlikely (status)) + return status; + + spans.shader.mask.base.texfmt = TEXCOORDFMT_1D; + spans.shader.mask.base.content = CAIRO_CONTENT_ALPHA; + spans.shader.mask.type.fragment = FS_SPANS; + + status = cairo_device_acquire (dst->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SPANS; + + device = i915_device (dst); + status = i915_shader_commit (&spans.shader, device); + if (unlikely (status)) + goto CLEANUP_DEVICE; + + status = draw_func (draw_closure, &spans.renderer, spans.extents); + if (spans.clip_region != NULL && status == CAIRO_STATUS_SUCCESS) { + intel_bo_unmap (spans.tail->bo); + + i915_vbo_finish (device); + + OUT_DWORD (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT); + for (vbo = &spans.head; vbo != NULL; vbo = vbo->next) { + int i, num_rectangles; + + /* XXX require_space & batch_flush */ + + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S (0) | I1_LOAD_S (1) | 1); + i915_batch_emit_reloc (device, vbo->bo, 0, + I915_GEM_DOMAIN_VERTEX, 0); + OUT_DWORD ((device->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (device->floats_per_vertex << S1_VERTEX_PITCH_SHIFT) | + vbo->count); + + num_rectangles = cairo_region_num_rectangles (spans.clip_region); + for (i = 0; i < num_rectangles; i++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (spans.clip_region, i, &rect); + + OUT_DWORD (_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_DWORD (SCISSOR_RECT_0_XMIN (rect.x) | + SCISSOR_RECT_0_YMIN (rect.y)); + OUT_DWORD (SCISSOR_RECT_0_XMAX (rect.x + rect.width) | + SCISSOR_RECT_0_YMAX (rect.y + rect.height)); + + OUT_DWORD (PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | vbo->count); + OUT_DWORD (0); + } + } + OUT_DWORD (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + } + +CLEANUP_DEVICE: + cairo_device_release (dst->intel.drm.base.device); +CLEANUP_SPANS: + i915_spans_fini (&spans); + + return status; +} diff --git a/src/drm/cairo-drm-i915-surface.c b/src/drm/cairo-drm-i915-surface.c new file mode 100644 index 00000000..2079ac96 --- /dev/null +++ b/src/drm/cairo-drm-i915-surface.c @@ -0,0 +1,1996 @@ +/* Cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Chris Wilson + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * ************************************************************************** + * This work was initially based upon xf86-video-intel/src/i915_render.c: + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Eric Anholt <eric@anholt.net> + * + * ************************************************************************** + * and also upon libdrm/intel/intel_bufmgr_gem.c: + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Eric Anholt <eric@anholt.net> + * Dave Airlie <airlied@linux.ie> + */ + +/* XXX + * + * - Per thread context? Would it actually avoid many locks? + * + */ + +#include "cairoint.h" + +#include "cairo-drm-private.h" +#include "cairo-drm-ioctl-private.h" +#include "cairo-drm-intel-private.h" +#include "cairo-drm-intel-command-private.h" +#include "cairo-drm-intel-ioctl-private.h" +#include "cairo-drm-i915-private.h" + +#include "cairo-boxes-private.h" +#include "cairo-cache-private.h" +#include "cairo-composite-rectangles-private.h" +#include "cairo-error-private.h" +#include "cairo-freelist-private.h" +#include "cairo-list-private.h" +#include "cairo-path-fixed-private.h" +#include "cairo-region-private.h" +#include "cairo-surface-offset-private.h" + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <errno.h> + +static cairo_int_status_t +i915_surface_fill (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t*source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip); + +static const uint32_t i915_batch_setup[] = { + /* Disable line anti-aliasing */ + _3DSTATE_AA_CMD, + + /* Disable independent alpha blend */ + _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) | + IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT) | + IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT), + + /* Disable texture crossbar */ + _3DSTATE_COORD_SET_BINDINGS | + CSB_TCB (0, 0) | + CSB_TCB (1, 1) | + CSB_TCB (2, 2) | + CSB_TCB (3, 3) | + CSB_TCB (4, 4) | + CSB_TCB (5, 5) | + CSB_TCB (6, 6) | + CSB_TCB (7, 7), + + _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | LINE_STRIP_PROVOKE_VRTX (1) | + ENABLE_TRI_FAN_PROVOKE_VRTX | TRI_FAN_PROVOKE_VRTX (2) | + ENABLE_TEXKILL_3D_4D | TEXKILL_4D, + + _3DSTATE_MODES_4_CMD | ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC (LOGICOP_COPY), + + _3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S (2) | + I1_LOAD_S (3) | + I1_LOAD_S (4) | + I1_LOAD_S (5) | + I1_LOAD_S (6) | + 4, + S2_TEXCOORD_NONE, + 0, /* Disable texture coordinate wrap-shortest */ + (1 << S4_POINT_WIDTH_SHIFT) | + S4_LINE_WIDTH_ONE | + S4_FLATSHADE_ALPHA | + S4_FLATSHADE_FOG | + S4_FLATSHADE_SPECULAR | + S4_FLATSHADE_COLOR | + S4_CULLMODE_NONE | + S4_VFMT_XY, + 0, /* Disable stencil buffer */ + S6_COLOR_WRITE_ENABLE, + + _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT, + _3DSTATE_DEPTH_SUBRECT_DISABLE, + + /* disable indirect state */ + _3DSTATE_LOAD_INDIRECT, + 0, + + _3DSTATE_STIPPLE, + 0, + + _3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE, +}; + +static const cairo_surface_backend_t i915_surface_backend; + +#define NEAREST_BIAS (-.375) + +static cairo_surface_t * +i915_surface_create_from_cacheable_image (cairo_drm_device_t *base_dev, + cairo_surface_t *source); + +static cairo_status_t +i915_bo_exec (i915_device_t *device, intel_bo_t *bo, uint32_t offset) +{ + struct drm_i915_gem_execbuffer2 execbuf; + int ret, cnt, i; + + /* Add the batch buffer to the validation list. */ + cnt = device->batch.exec_count; + if (cnt > 0 && bo->base.handle == device->batch.exec[cnt-1].handle) + i = cnt - 1; + else + i = device->batch.exec_count++; + device->batch.exec[i].handle = bo->base.handle; + device->batch.exec[i].relocation_count = device->batch.reloc_count; + device->batch.exec[i].relocs_ptr = (uintptr_t) device->batch.reloc; + device->batch.exec[i].alignment = 0; + device->batch.exec[i].offset = 0; + device->batch.exec[i].flags = 0; + device->batch.exec[i].rsvd1 = 0; + device->batch.exec[i].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) device->batch.exec; + execbuf.buffer_count = device->batch.exec_count; + execbuf.batch_start_offset = offset; + execbuf.batch_len = (device->batch.used << 2) + sizeof (device->batch_header); + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.num_cliprects = 0; + execbuf.cliprects_ptr = 0; + execbuf.flags = 0; + execbuf.rsvd1 = 0; + execbuf.rsvd2 = 0; + + if (I915_VERBOSE && device->debug & I915_DEBUG_EXEC) { + int n; + fprintf (stderr, + "Executing batch: %d+%d bytes, %d buffers, %d relocations\n", + execbuf.batch_start_offset, + execbuf.batch_len, + device->batch.exec_count, + device->batch.reloc_count); + for (n = 0; n < device->batch.exec_count; n++) { + fprintf (stderr, " exec[%d] = %d\n", n, + device->batch.exec[n].handle); + } + for (n = 0; n < device->batch.reloc_count; n++) { + fprintf (stderr, " reloc[%d] = %d @ %qx\n", n, + device->batch.reloc[n].target_handle, + (unsigned long long) device->batch.reloc[n].offset); + } + } + + do { + ret = ioctl (device->intel.base.fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); + } while (ret != 0 && errno == EINTR); + + if (I915_VERBOSE && ret) { + int n; + + fprintf (stderr, "Batch submission failed: %d\n", errno); + fprintf (stderr, " relocation entries: %d/%d\n", + device->batch.reloc_count, I915_MAX_RELOCS); + fprintf (stderr, " gtt size: %zd/%zd\n", + device->batch.gtt_size, device->intel.gtt_avail_size); + + fprintf (stderr, " buffers:\n"); + for (n = 0; n < cnt; n++) { + fprintf (stderr, " exec[%d] = %d\n", + n, device->batch.target_bo[n]->base.size); + } + + intel_dump_batchbuffer (device->batch_header, + device->batch.used, + device->intel.base.chip_id); + } + + VG (VALGRIND_MAKE_MEM_DEFINED (device->batch.exec, sizeof (device->batch.exec[0]) * i)); + + bo->offset = device->batch.exec[i].offset; + while (cnt--) { + device->batch.target_bo[cnt]->offset = device->batch.exec[cnt].offset; + device->batch.target_bo[cnt]->exec = NULL; + device->batch.target_bo[cnt]->batch_read_domains = 0; + device->batch.target_bo[cnt]->batch_write_domain = 0; + intel_bo_destroy (&device->intel, device->batch.target_bo[cnt]); + } + + device->batch.exec_count = 0; + device->batch.reloc_count = 0; + + device->batch.gtt_size = I915_BATCH_SIZE; + + return ret == 0 ? CAIRO_STATUS_SUCCESS : _cairo_error (CAIRO_STATUS_NO_MEMORY); +} + +void +i915_batch_add_reloc (i915_device_t *device, + uint32_t pos, + intel_bo_t *bo, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain) +{ + int index; + + if (bo->exec == NULL) { + device->batch.gtt_size += bo->base.size; + + index = device->batch.exec_count++; + device->batch.exec[index].handle = bo->base.handle; + device->batch.exec[index].relocation_count = 0; + device->batch.exec[index].relocs_ptr = 0; + device->batch.exec[index].alignment = 0; + device->batch.exec[index].offset = 0; + device->batch.exec[index].flags = 0; + device->batch.exec[index].rsvd1 = 0; + device->batch.exec[index].rsvd2 = 0; + + device->batch.target_bo[index] = intel_bo_reference (bo); + + bo->exec = &device->batch.exec[index]; + } + + index = device->batch.reloc_count++; + device->batch.reloc[index].offset = (pos << 2) + sizeof (device->batch_header); + device->batch.reloc[index].delta = offset; + device->batch.reloc[index].target_handle = bo->base.handle; + device->batch.reloc[index].read_domains = read_domains; + device->batch.reloc[index].write_domain = write_domain; + device->batch.reloc[index].presumed_offset = bo->offset; + + assert (write_domain == 0 || bo->batch_write_domain == 0 || bo->batch_write_domain == write_domain); + bo->batch_read_domains |= read_domains; + bo->batch_write_domain |= write_domain; +} + +void +i915_vbo_finish (i915_device_t *device) +{ + if (device->vbo_used == 0) + return; + + if (device->vbo || i915_batch_space (device) < (int32_t) device->vbo_used) { + intel_bo_t *vbo; + + if (device->vertex_count) { + if (device->vbo == 0) { + /* XXX unchecked, must fit! */ + /* XXX batch_flush and i915_shader_commit (device, device->shader)); */ + assert (i915_check_aperture_size (device, 1, + (device->vbo_used + 4095) & -4096)); + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S (0) | + I1_LOAD_S (1) | + 1); + device->vbo = device->batch.used++; + device->vbo_max_index = device->batch.used; + OUT_DWORD ((device->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (device->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + } + + OUT_DWORD (PRIM3D_RECTLIST | + PRIM3D_INDIRECT_SEQUENTIAL | + device->vertex_count); + OUT_DWORD (device->vertex_index); + } + + if (I915_VERBOSE && device->debug & I915_DEBUG_BUFFER) { + fprintf (stderr, "Creating vertex buffer: %d bytes\n", + device->vbo_used); + } + + if (device->last_vbo != NULL) + intel_bo_destroy (&device->intel, device->last_vbo); + + device->batch_base[device->vbo_max_index] |= device->vertex_index + device->vertex_count; + + /* will include a few bytes of inter-array padding */ + vbo = intel_bo_create (&device->intel, device->vbo_used, FALSE); + i915_batch_fill_reloc (device, device->vbo, + vbo, 0, + I915_GEM_DOMAIN_VERTEX, 0); + intel_bo_write (&device->intel, vbo, 0, device->vbo_used, device->vbo_base); + device->last_vbo = vbo; + device->last_vbo_offset = (device->vbo_used+7)&-8; + device->last_vbo_space = vbo->base.size - device->last_vbo_offset; + + device->vbo = 0; + } + else + { + /* Only a single rectlist in this batch, and no active vertex buffer. */ + OUT_DWORD (PRIM3D_RECTLIST | (device->vbo_used / 4 - 1)); + + memcpy (BATCH_PTR (device), device->vbo_base, device->vbo_used); + device->batch.used += device->vbo_used >> 2; + } + + device->vbo_used = device->vbo_offset = 0; + device->vertex_index = device->vertex_count = 0; +} + +/* XXX improve state tracker/difference and flush state on vertex emission */ +static void +i915_device_reset (i915_device_t *device) +{ + if (device->current_source != NULL) + *device->current_source = 0; + if (device->current_mask != NULL) + *device->current_mask = 0; + if (device->current_clip != NULL) + *device->current_clip = 0; + + device->current_target = NULL; + device->current_size = 0; + device->current_source = NULL; + device->current_mask = NULL; + device->current_clip = NULL; + device->current_shader = 0; + device->current_texcoords = ~0; + device->current_blend = 0; + device->current_n_constants = 0; + device->current_n_samplers = 0; + device->current_colorbuf = 0; + device->current_diffuse = 0; + device->current_program = ~0; + + device->last_source_fragment = ~0; + + device->floats_per_vertex = 0; +} + +static void +i915_batch_cleanup (i915_device_t *device) +{ + int i; + + for (i = 0; i < device->batch.exec_count; i++) + intel_bo_destroy (&device->intel, device->batch.target_bo[i]); + + device->batch.exec_count = 0; + device->batch.reloc_count = 0; +} + +cairo_status_t +i915_batch_flush (i915_device_t *device) +{ + intel_bo_t *batch; + cairo_status_t status; + uint32_t length, offset; + int n; + + i915_vbo_finish (device); + + if (device->batch.used == 0) + return CAIRO_STATUS_SUCCESS; + + i915_batch_emit_dword (device, MI_BATCH_BUFFER_END); + if ((device->batch.used & 1) != (sizeof (device->batch_header) & 4)) + i915_batch_emit_dword (device, MI_NOOP); + + length = (device->batch.used << 2) + sizeof (device->batch_header); + + if (I915_VERBOSE && device->debug & I915_DEBUG_BATCH) + intel_dump_batchbuffer (device->batch_header, length, device->intel.base.chip_id); + + intel_glyph_cache_unmap(&device->intel); + + /* NB: it is faster to copy the data then map/unmap the batch, + * presumably because we frequently only use a small part of the buffer. + */ + batch = NULL; + if (device->last_vbo) { + if (length <= device->last_vbo_space) { + if (I915_VERBOSE && device->debug & I915_DEBUG_BATCH) { + fprintf (stderr, "Packing batch buffer into last vbo: %d+%d bytes\n", length, device->last_vbo_offset); + } + batch = device->last_vbo; + offset = device->last_vbo_offset; + + /* fixup the relocations */ + for (n = 0; n < device->batch.reloc_count; n++) + device->batch.reloc[n].offset += offset; + } else + intel_bo_destroy (&device->intel, device->last_vbo); + device->last_vbo = NULL; + } + if (batch == NULL) { + if (I915_VERBOSE && device->debug & I915_DEBUG_BUFFER) { + fprintf (stderr, "Creating batch buffer: %d bytes\n", length); + } + batch = intel_bo_create (&device->intel, length, FALSE); + if (unlikely (batch == NULL)) { + status = _cairo_error (CAIRO_STATUS_NO_MEMORY); + i915_batch_cleanup (device); + goto BAIL; + } + + offset = 0; + } + intel_bo_write (&device->intel, batch, offset, length, device->batch_header); + status = i915_bo_exec (device, batch, offset); + + if (device->debug & I915_DEBUG_SYNC && status == CAIRO_STATUS_SUCCESS) + intel_bo_wait (&device->intel, batch); + + intel_bo_destroy (&device->intel, batch); + +BAIL: + device->batch.used = 0; + + intel_glyph_cache_unpin (&device->intel); + intel_snapshot_cache_thaw (&device->intel); + + i915_device_reset (device); + + return status; +} + +cairo_status_t +i915_vbo_flush (i915_device_t *device) +{ + assert (device->vertex_count); + + if (device->vbo == 0) { + assert (device->floats_per_vertex); + + if (i915_batch_space (device) < 9 || + ! i915_check_aperture_size (device, 1, I915_VBO_SIZE)) + { + return i915_batch_flush (device); + } + + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S (0) | + I1_LOAD_S (1) | + 1); + device->vbo = device->batch.used++; + device->vbo_max_index = device->batch.used; + OUT_DWORD ((device->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (device->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + } + + OUT_DWORD (PRIM3D_RECTLIST | + PRIM3D_INDIRECT_SEQUENTIAL | + device->vertex_count); + OUT_DWORD (device->vertex_index); + + device->vertex_index += device->vertex_count; + device->vertex_count = 0; + + return CAIRO_STATUS_SUCCESS; +} + +#if 0 +static float * +i915_add_rectangles (i915_device_t *device, int num_rects, int *count) +{ + float *vertices; + uint32_t size; + int cnt; + + assert (device->floats_per_vertex); + + size = device->rectangle_size; + if (unlikely (device->vbo_offset + size > I915_VBO_SIZE)) + i915_vbo_finish (device); + + vertices = (float *) (device->vbo_base + device->vbo_offset); + cnt = (I915_VBO_SIZE - device->vbo_offset) / size; + if (cnt > num_rects) + cnt = num_rects; + device->vbo_used = device->vbo_offset += size * cnt; + device->vertex_count += 3 * cnt; + *count = cnt; + return vertices; +} +#endif + +static cairo_status_t +i915_surface_finish (void *abstract_surface) +{ + i915_surface_t *surface = abstract_surface; + i915_device_t *device = i915_device (surface); + + if (surface->stencil != NULL) + intel_bo_destroy (&device->intel, surface->stencil); + + if (surface->is_current_texture) { + if (surface->is_current_texture & CURRENT_SOURCE) + device->current_source = NULL; + if (surface->is_current_texture & CURRENT_MASK) + device->current_mask = NULL; + if (surface->is_current_texture & CURRENT_CLIP) + device->current_clip = NULL; + device->current_n_samplers = 0; + } + + if (surface == device->current_target) + device->current_target = NULL; + + if (surface->cache != NULL) { + i915_image_private_t *node = surface->cache; + intel_buffer_cache_t *cache = node->container; + + if (--cache->ref_count == 0) { + intel_bo_destroy (&device->intel, cache->buffer.bo); + _cairo_rtree_fini (&cache->rtree); + cairo_list_del (&cache->link); + free (cache); + } else { + node->node.state = CAIRO_RTREE_NODE_AVAILABLE; + cairo_list_move (&node->node.link, &cache->rtree.available); + _cairo_rtree_node_collapse (&cache->rtree, node->node.parent); + } + } + + return _cairo_drm_surface_finish (&surface->intel.drm); +} + +static cairo_status_t +i915_surface_batch_flush (i915_surface_t *surface) +{ + cairo_status_t status; + intel_bo_t *bo; + + assert (surface->intel.drm.fallback == NULL); + + bo = to_intel_bo (surface->intel.drm.bo); + if (bo == NULL || bo->batch_write_domain == 0) + return CAIRO_STATUS_SUCCESS; + + status = cairo_device_acquire (surface->intel.drm.base.device); + if (unlikely (status)) + return status; + + status = i915_batch_flush (i915_device (surface)); + cairo_device_release (surface->intel.drm.base.device); + + return status; +} + +static cairo_status_t +i915_surface_flush (void *abstract_surface) +{ + i915_surface_t *surface = abstract_surface; + + if (surface->intel.drm.fallback == NULL) { + if (surface->intel.drm.base.finished) { + /* Forgo flushing on finish as the user cannot access the surface directly. */ + return CAIRO_STATUS_SUCCESS; + } + + return i915_surface_batch_flush (surface); + } + + return intel_surface_flush (abstract_surface); +} + +/* rasterisation */ + +static cairo_status_t +_composite_boxes_spans (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents) +{ + cairo_boxes_t *boxes = closure; + cairo_rectangular_scan_converter_t converter; + struct _cairo_boxes_chunk *chunk; + cairo_status_t status; + int i; + + _cairo_rectangular_scan_converter_init (&converter, extents); + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + cairo_box_t *box = chunk->base; + for (i = 0; i < chunk->count; i++) { + status = _cairo_rectangular_scan_converter_add_box (&converter, &box[i], 1); + if (unlikely (status)) + goto CLEANUP; + } + } + + status = converter.base.generate (&converter.base, renderer); + + CLEANUP: + converter.base.destroy (&converter.base); + return status; +} + +cairo_status_t +i915_fixup_unbounded (i915_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip) +{ + i915_shader_t shader; + cairo_status_t status; + + i915_shader_init (&shader, dst, CAIRO_OPERATOR_CLEAR); + + if (clip != NULL) { + cairo_region_t *clip_region = NULL; + + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + assert (clip_region == NULL); + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i915_shader_set_clip (&shader, clip); + } else { + if (extents->bounded.width == extents->unbounded.width && + extents->bounded.height == extents->unbounded.height) + { + return CAIRO_STATUS_SUCCESS; + } + } + + status = i915_shader_commit (&shader, + (i915_device_t *) dst->intel.drm.base.device); + if (unlikely (status)) + return status; + + /* top */ + if (extents->bounded.y != extents->unbounded.y) { + shader.add_rectangle (&shader, + extents->unbounded.x, + extents->unbounded.y, + extents->unbounded.width, + extents->bounded.y - extents->unbounded.y); + } + + /* left */ + if (extents->bounded.x != extents->unbounded.x) { + shader.add_rectangle (&shader, + extents->unbounded.x, + extents->bounded.y, + extents->bounded.x - extents->unbounded.x, + extents->bounded.height); + } + + /* right */ + if (extents->bounded.x + extents->bounded.width != extents->unbounded.x + extents->unbounded.width) { + shader.add_rectangle (&shader, + extents->bounded.x + extents->bounded.width, + extents->bounded.y, + extents->unbounded.x + extents->unbounded.width - (extents->bounded.x + extents->bounded.width), + extents->bounded.height); + } + + /* bottom */ + if (extents->bounded.y + extents->bounded.height != extents->unbounded.y + extents->unbounded.height) { + shader.add_rectangle (&shader, + extents->unbounded.x, + extents->bounded.y + extents->bounded.height, + extents->unbounded.width, + extents->unbounded.y + extents->unbounded.height - (extents->bounded.y + extents->bounded.height)); + } + + i915_shader_fini (&shader); + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_fixup_unbounded_boxes (i915_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip, + cairo_boxes_t *boxes) +{ + cairo_boxes_t clear; + cairo_box_t box; + cairo_region_t *clip_region = NULL; + cairo_status_t status; + struct _cairo_boxes_chunk *chunk; + i915_shader_t shader; + int i; + + if (boxes->num_boxes <= 1) + return i915_fixup_unbounded (dst, extents, clip); + + i915_shader_init (&shader, dst, CAIRO_OPERATOR_CLEAR); + _cairo_boxes_init (&clear); + + box.p1.x = _cairo_fixed_from_int (extents->unbounded.x + extents->unbounded.width); + box.p1.y = _cairo_fixed_from_int (extents->unbounded.y); + box.p2.x = _cairo_fixed_from_int (extents->unbounded.x); + box.p2.y = _cairo_fixed_from_int (extents->unbounded.y + extents->unbounded.height); + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i915_shader_set_clip (&shader, clip); + } + + if (clip_region == NULL) { + cairo_boxes_t tmp; + + _cairo_boxes_init (&tmp); + + status = _cairo_boxes_add (&tmp, &box); + assert (status == CAIRO_STATUS_SUCCESS); + + tmp.chunks.next = &boxes->chunks; + tmp.num_boxes += boxes->num_boxes; + + status = _cairo_bentley_ottmann_tessellate_boxes (&tmp, + CAIRO_FILL_RULE_WINDING, + &clear); + + tmp.chunks.next = NULL; + } else { + pixman_box32_t *pbox; + + pbox = pixman_region32_rectangles (&clip_region->rgn, &i); + _cairo_boxes_limit (&clear, (cairo_box_t *) pbox, i); + + status = _cairo_boxes_add (&clear, &box); + assert (status == CAIRO_STATUS_SUCCESS); + + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + for (i = 0; i < chunk->count; i++) { + status = _cairo_boxes_add (&clear, &chunk->base[i]); + if (unlikely (status)) { + _cairo_boxes_fini (&clear); + return status; + } + } + } + + status = _cairo_bentley_ottmann_tessellate_boxes (&clear, + CAIRO_FILL_RULE_WINDING, + &clear); + } + + if (likely (status == CAIRO_STATUS_SUCCESS && clear.num_boxes)) { + status = i915_shader_commit (&shader, + (i915_device_t *) dst->intel.drm.base.device); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + for (chunk = &clear.chunks; chunk != NULL; chunk = chunk->next) { + for (i = 0; i < chunk->count; i++) { + int x1 = _cairo_fixed_integer_part (chunk->base[i].p1.x); + int y1 = _cairo_fixed_integer_part (chunk->base[i].p1.y); + int x2 = _cairo_fixed_integer_part (chunk->base[i].p2.x); + int y2 = _cairo_fixed_integer_part (chunk->base[i].p2.y); + + shader.add_rectangle (&shader, x1, y1, x2 - x1, y2 - y1); + } + } + } + i915_shader_fini (&shader); + } + + _cairo_boxes_fini (&clear); + + return status; +} + +static cairo_status_t +_composite_boxes (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_boxes_t *boxes, + cairo_antialias_t antialias, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + cairo_bool_t need_clip_surface = FALSE; + cairo_region_t *clip_region = NULL; + const struct _cairo_boxes_chunk *chunk; + cairo_status_t status; + i915_shader_t shader; + int i; + + /* If the boxes are not pixel-aligned, we will need to compute a real mask */ + if (antialias != CAIRO_ANTIALIAS_NONE) { + if (! boxes->is_pixel_aligned) + return CAIRO_INT_STATUS_UNSUPPORTED; + } + + i915_shader_init (&shader, dst, op); + + status = i915_shader_acquire_pattern (&shader, + &shader.source, + pattern, + &extents->bounded); + if (unlikely (status)) + return status; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + if (need_clip_surface) + i915_shader_set_clip (&shader, clip); + } + + status = i915_shader_commit (&shader, + (i915_device_t *) dst->intel.drm.base.device); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + cairo_box_t *box = chunk->base; + for (i = 0; i < chunk->count; i++) { + int x1 = _cairo_fixed_integer_round (box[i].p1.x); + int y1 = _cairo_fixed_integer_round (box[i].p1.y); + int x2 = _cairo_fixed_integer_round (box[i].p2.x); + int y2 = _cairo_fixed_integer_round (box[i].p2.y); + + if (x2 > x1 && y2 > y1) + shader.add_rectangle (&shader, x1, y1, x2 - x1, y2 - y1); + } + } + + if (! extents->is_bounded) + status = i915_fixup_unbounded_boxes (dst, extents, clip, boxes); + } + i915_shader_fini (&shader); + + return status; +} + +static cairo_status_t +_clip_and_composite_boxes (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *src, + cairo_boxes_t *boxes, + cairo_antialias_t antialias, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip) +{ + cairo_status_t status; + + if (boxes->num_boxes == 0) { + if (extents->is_bounded) + return CAIRO_STATUS_SUCCESS; + + return i915_fixup_unbounded (dst, extents, clip); + } + + /* Use a fast path if the boxes are pixel aligned */ + status = _composite_boxes (dst, op, src, boxes, antialias, clip, extents); + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + return status; + + /* Otherwise render the boxes via an implicit mask and composite in the usual + * fashion. + */ + return i915_clip_and_composite_spans (dst, op, src, antialias, + _composite_boxes_spans, boxes, + extents, clip); +} + +static cairo_bool_t +box_is_aligned (const cairo_box_t *box) +{ + return + _cairo_fixed_is_integer (box->p1.x) && + _cairo_fixed_is_integer (box->p1.y) && + _cairo_fixed_is_integer (box->p2.x) && + _cairo_fixed_is_integer (box->p2.y); +} + +static inline cairo_status_t +_clip_to_boxes (cairo_clip_t **clip, + const cairo_composite_rectangles_t *extents, + cairo_box_t **boxes, + int *num_boxes) +{ + cairo_status_t status; + const cairo_rectangle_int_t *rect; + + rect = extents->is_bounded ? &extents->bounded: &extents->unbounded; + + if (*clip == NULL) + goto EXTENTS; + + status = _cairo_clip_rectangle (*clip, rect); + if (unlikely (status)) + return status; + + status = _cairo_clip_get_boxes (*clip, boxes, num_boxes); + if (status != CAIRO_INT_STATUS_UNSUPPORTED) { + if (extents->is_bounded || (*num_boxes == 1 && box_is_aligned (*boxes))) + *clip = NULL; + return status; + } + + EXTENTS: + _cairo_box_from_rectangle (&(*boxes)[0], rect); + *num_boxes = 1; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_clip_path_t * +_clip_get_solitary_path (cairo_clip_t *clip) +{ + cairo_clip_path_t *iter = clip->path; + cairo_clip_path_t *path = NULL; + + do { + if ((iter->flags & CAIRO_CLIP_PATH_IS_BOX) == 0) { + if (path != NULL) + return FALSE; + + path = iter; + } + iter = iter->prev; + } while (iter != NULL); + + return path; +} + +static cairo_int_status_t +i915_surface_paint (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_clip_t *clip) +{ + i915_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_clip_path_t *clip_path; + cairo_boxes_t boxes; + int num_boxes = ARRAY_LENGTH (boxes.boxes_embedded); + cairo_box_t *clip_boxes = boxes.boxes_embedded; + cairo_status_t status; + + /* XXX unsupported operators? use pixel shader blending, eventually */ + + status = _cairo_composite_rectangles_init_for_paint (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, + clip); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + /* If the clip cannot be reduced to a set of boxes, we will need to + * use a clipmask. Paint is special as it is the only operation that + * does not implicitly use a mask, so we may be able to reduce this + * operation to a fill... + */ + if (clip != NULL && + extents.is_bounded && + (clip_path = _clip_get_solitary_path (clip)) != NULL) + { + status = i915_surface_fill (dst, op, source, + &clip_path->path, + clip_path->fill_rule, + clip_path->tolerance, + clip_path->antialias, + NULL); + } + else + { + _cairo_boxes_init_for_array (&boxes, clip_boxes, num_boxes); + status = _clip_and_composite_boxes (dst, op, source, + &boxes, CAIRO_ANTIALIAS_DEFAULT, + &extents, clip); + } + if (clip_boxes != boxes.boxes_embedded) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static cairo_int_status_t +i915_surface_mask (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + const cairo_pattern_t *mask, + cairo_clip_t *clip) +{ + i915_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + i915_shader_t shader; + cairo_clip_t local_clip; + cairo_region_t *clip_region = NULL; + cairo_bool_t need_clip_surface = FALSE; + cairo_bool_t have_clip = FALSE; + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_mask (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, mask, clip); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL && extents.is_bounded) { + clip = _cairo_clip_init_copy (&local_clip, clip); + status = _cairo_clip_rectangle (clip, &extents.bounded); + if (unlikely (status)) { + _cairo_clip_fini (&local_clip); + return status; + } + + have_clip = TRUE; + } + + i915_shader_init (&shader, dst, op); + + status = i915_shader_acquire_pattern (&shader, + &shader.source, + source, + &extents.bounded); + if (unlikely (status)) + goto BAIL; + + status = i915_shader_acquire_pattern (&shader, + &shader.mask, + mask, + &extents.bounded); + if (unlikely (status)) + goto BAIL; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + if (need_clip_surface) + i915_shader_set_clip (&shader, clip); + } + + status = i915_shader_commit (&shader, + (i915_device_t *) dst->intel.drm.base.device); + if (unlikely (status)) + goto BAIL; + + if (clip_region != NULL) { + unsigned int n, num_rectangles; + + num_rectangles = cairo_region_num_rectangles (clip_region); + for (n = 0; n < num_rectangles; n++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, n, &rect); + + shader.add_rectangle (&shader, + rect.x, rect.y, + rect.x + rect.width, rect.y + rect.height); + } + } else { + shader.add_rectangle (&shader, + extents.bounded.x, extents.bounded.y, + extents.bounded.x + extents.bounded.width, + extents.bounded.y + extents.bounded.height); + } + + if (! extents.is_bounded) + status = i915_fixup_unbounded (dst, &extents, clip); + + BAIL: + i915_shader_fini (&shader); + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +typedef struct { + cairo_polygon_t polygon; + cairo_fill_rule_t fill_rule; + cairo_antialias_t antialias; +} composite_polygon_info_t; + +static cairo_status_t +_composite_polygon_spans (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents) +{ + composite_polygon_info_t *info = closure; + cairo_botor_scan_converter_t converter; + cairo_status_t status; + cairo_box_t box; + + box.p1.x = _cairo_fixed_from_int (extents->x); + box.p1.y = _cairo_fixed_from_int (extents->y); + box.p2.x = _cairo_fixed_from_int (extents->x + extents->width); + box.p2.y = _cairo_fixed_from_int (extents->y + extents->height); + + _cairo_botor_scan_converter_init (&converter, &box, info->fill_rule); + + status = converter.base.add_polygon (&converter.base, &info->polygon); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = converter.base.generate (&converter.base, renderer); + + converter.base.destroy (&converter.base); + + return status; +} + +static cairo_int_status_t +i915_surface_stroke (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + const cairo_stroke_style_t *stroke_style, + const cairo_matrix_t *ctm, + const cairo_matrix_t *ctm_inverse, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + i915_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + composite_polygon_info_t info; + cairo_box_t boxes_stack[32], *clip_boxes = boxes_stack; + int num_boxes = ARRAY_LENGTH (boxes_stack); + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_stroke (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, + path, stroke_style, ctm, + clip); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + if (path->is_rectilinear) { + cairo_boxes_t boxes; + + _cairo_boxes_init (&boxes); + _cairo_boxes_limit (&boxes, clip_boxes, num_boxes); + status = _cairo_path_fixed_stroke_rectilinear_to_boxes (path, + stroke_style, + ctm, + &boxes); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = _clip_and_composite_boxes (dst, op, source, + &boxes, antialias, + &extents, clip); + } + + _cairo_boxes_fini (&boxes); + + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + goto CLEANUP_BOXES; + } + + _cairo_polygon_init (&info.polygon); + _cairo_polygon_limit (&info.polygon, clip_boxes, num_boxes); + + status = _cairo_path_fixed_stroke_to_polygon (path, + stroke_style, + ctm, ctm_inverse, + tolerance, + &info.polygon); + if (unlikely (status)) + goto CLEANUP_POLYGON; + + if (extents.is_bounded) { + cairo_rectangle_int_t rect; + + _cairo_box_round_to_rectangle (&info.polygon.extents, &rect); + if (! _cairo_rectangle_intersect (&extents.bounded, &rect)) + goto CLEANUP_POLYGON; + } + + if (info.polygon.num_edges == 0) { + if (! extents.is_bounded) + status = i915_fixup_unbounded (dst, &extents, clip); + + goto CLEANUP_POLYGON; + } + + info.fill_rule = CAIRO_FILL_RULE_WINDING; + info.antialias = antialias; + status = i915_clip_and_composite_spans (dst, op, source, antialias, + _composite_polygon_spans, &info, + &extents, clip); + +CLEANUP_POLYGON: + _cairo_polygon_fini (&info.polygon); + +CLEANUP_BOXES: + if (clip_boxes != boxes_stack) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static cairo_int_status_t +i915_surface_fill (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t*source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + i915_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + composite_polygon_info_t info; + cairo_box_t boxes_stack[32], *clip_boxes = boxes_stack; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + int num_boxes = ARRAY_LENGTH (boxes_stack); + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_fill (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, path, + clip); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + assert (! path->is_empty_fill); + + if (_cairo_path_fixed_is_rectilinear_fill (path)) { + cairo_boxes_t boxes; + + _cairo_boxes_init (&boxes); + _cairo_boxes_limit (&boxes, clip_boxes, num_boxes); + status = _cairo_path_fixed_fill_rectilinear_to_boxes (path, + fill_rule, + &boxes); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = _clip_and_composite_boxes (dst, op, source, + &boxes, antialias, + &extents, clip); + } + + _cairo_boxes_fini (&boxes); + + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + goto CLEANUP_BOXES; + } + + _cairo_polygon_init (&info.polygon); + _cairo_polygon_limit (&info.polygon, clip_boxes, num_boxes); + + status = _cairo_path_fixed_fill_to_polygon (path, tolerance, &info.polygon); + if (unlikely (status)) + goto CLEANUP_POLYGON; + + if (extents.is_bounded) { + cairo_rectangle_int_t rect; + + _cairo_box_round_to_rectangle (&info.polygon.extents, &rect); + if (! _cairo_rectangle_intersect (&extents.bounded, &rect)) + goto CLEANUP_POLYGON; + } + + if (info.polygon.num_edges == 0) { + if (! extents.is_bounded) + status = i915_fixup_unbounded (dst, &extents, clip); + + goto CLEANUP_POLYGON; + } + + info.fill_rule = fill_rule; + info.antialias = antialias; + status = i915_clip_and_composite_spans (dst, op, source, antialias, + _composite_polygon_spans, &info, + &extents, clip); + +CLEANUP_POLYGON: + _cairo_polygon_fini (&info.polygon); + +CLEANUP_BOXES: + if (clip_boxes != boxes_stack) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static const cairo_surface_backend_t i915_surface_backend = { + CAIRO_SURFACE_TYPE_DRM, + + _cairo_drm_surface_create_similar, + i915_surface_finish, + intel_surface_acquire_source_image, + intel_surface_release_source_image, + + NULL, NULL, NULL, + NULL, /* composite */ + NULL, /* fill */ + NULL, /* trapezoids */ + NULL, /* span */ + NULL, /* check-span */ + + NULL, /* copy_page */ + NULL, /* show_page */ + _cairo_drm_surface_get_extents, + NULL, /* old-glyphs */ + _cairo_drm_surface_get_font_options, + + i915_surface_flush, + NULL, /* mark_dirty */ + intel_scaled_font_fini, + intel_scaled_glyph_fini, + + i915_surface_paint, + i915_surface_mask, + i915_surface_stroke, + i915_surface_fill, + i915_surface_glyphs, +}; + +static void +i915_surface_init (i915_surface_t *surface, + cairo_content_t content, + cairo_drm_device_t *device) +{ + intel_surface_init (&surface->intel, &i915_surface_backend, device, content); + + switch (content) { + default: + ASSERT_NOT_REACHED; + case CAIRO_CONTENT_COLOR_ALPHA: + surface->map0 = MAPSURF_32BIT | MT_32BIT_ARGB8888; + surface->colorbuf = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER; + break; + case CAIRO_CONTENT_COLOR: + surface->map0 = MAPSURF_32BIT | MT_32BIT_XRGB8888; + surface->colorbuf = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER; + break; + case CAIRO_CONTENT_ALPHA: + surface->map0 = MAPSURF_8BIT | MT_8BIT_A8; + surface->colorbuf = COLR_BUF_8BIT | DEPTH_FRMT_24_FIXED_8_OTHER; + break; + } + surface->colorbuf |= DSTORG_HORT_BIAS (0x8) | DSTORG_VERT_BIAS (0x8); + + surface->map1 = 0; + + surface->is_current_texture = 0; + + surface->offset = 0; + + surface->stencil = NULL; + surface->cache = NULL; +} + +cairo_surface_t * +i915_surface_create_internal (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height, + uint32_t tiling, + cairo_bool_t gpu_target) +{ + i915_surface_t *surface; + cairo_status_t status_ignored; + + surface = malloc (sizeof (i915_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + i915_surface_init (surface, content, base_dev); + + if (width && height) { + uint32_t size; + + surface->intel.drm.width = width; + surface->intel.drm.height = height; + surface->map0 |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + + width = (width + 3) & -4; + surface->intel.drm.stride = cairo_format_stride_for_width (surface->intel.drm.format, + width); + /* check for tiny surfaces for which tiling is irrelevant */ + if (height * surface->intel.drm.stride < 4096) + tiling = I915_TILING_NONE; + + surface->intel.drm.stride = i915_tiling_stride (tiling, + surface->intel.drm.stride); + assert (surface->intel.drm.stride <= 8192); + assert (surface->intel.drm.stride >= cairo_format_stride_for_width (surface->intel.drm.format, width)); + height = i915_tiling_height (tiling, height); + assert (height <= 2048); + + size = i915_tiling_size (tiling, surface->intel.drm.stride * height); + + surface->intel.drm.bo = &intel_bo_create (to_intel_device (&base_dev->base), + size, gpu_target)->base; + if (surface->intel.drm.bo == NULL) { + status_ignored = _cairo_drm_surface_finish (&surface->intel.drm); + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + + intel_bo_set_tiling (to_intel_device (&base_dev->base), + to_intel_bo (surface->intel.drm.bo), + tiling, surface->intel.drm.stride); + + assert (surface->intel.drm.bo->size >= (size_t) surface->intel.drm.stride*height); + + surface->map0 |= MS3_tiling (to_intel_bo (surface->intel.drm.bo)->tiling); + surface->map1 = (surface->intel.drm.stride/4 - 1) << MS4_PITCH_SHIFT; + } + + return &surface->intel.drm.base; +} + +static cairo_surface_t * +i915_surface_create (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height) +{ + return i915_surface_create_internal (base_dev, content, width, height, + I915_TILING_DEFAULT, TRUE); +} + +static cairo_surface_t * +i915_surface_create_for_name (cairo_drm_device_t *base_dev, + unsigned int name, + cairo_format_t format, + int width, int height, int stride) +{ + i915_surface_t *surface; + cairo_content_t content; + + /* Vol I, p134: size restrictions for textures */ + /* Vol I, p129: destination surface stride must be a multiple of 32 bytes */ + if (stride < cairo_format_stride_for_width (format, (width + 3) & -4) || + stride & 31) + { + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_STRIDE)); + } + + switch (format) { + default: + case CAIRO_FORMAT_A1: + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); + case CAIRO_FORMAT_ARGB32: + content = CAIRO_CONTENT_COLOR_ALPHA; + break; + case CAIRO_FORMAT_RGB24: + content = CAIRO_CONTENT_COLOR; + break; + case CAIRO_FORMAT_A8: + content = CAIRO_CONTENT_ALPHA; + break; + } + + surface = malloc (sizeof (i915_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + i915_surface_init (surface, content, base_dev); + + if (width && height) { + surface->intel.drm.width = width; + surface->intel.drm.height = height; + surface->intel.drm.stride = stride; + + surface->map0 |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + surface->map1 = (surface->intel.drm.stride/4 - 1) << MS4_PITCH_SHIFT; + + surface->intel.drm.bo = + &intel_bo_create_for_name (to_intel_device (&base_dev->base), + name)->base; + if (unlikely (surface->intel.drm.bo == NULL)) { + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + to_intel_bo (surface->intel.drm.bo)->stride = stride; + + surface->map0 |= MS3_tiling (to_intel_bo (surface->intel.drm.bo)->tiling); + } + + return &surface->intel.drm.base; +} + +static cairo_status_t +i915_buffer_cache_init (intel_buffer_cache_t *cache, + i915_device_t *device, + cairo_format_t format, + int width, int height) +{ + const uint32_t tiling = I915_TILING_Y; + + assert ((width & 3) == 0); + assert ((height & 1) == 0); + cache->buffer.width = width; + cache->buffer.height = height; + + switch (format) { + case CAIRO_FORMAT_A1: + case CAIRO_FORMAT_RGB24: + ASSERT_NOT_REACHED; + case CAIRO_FORMAT_ARGB32: + cache->buffer.map0 = MAPSURF_32BIT | MT_32BIT_ARGB8888; + cache->buffer.stride = width * 4; + break; + case CAIRO_FORMAT_A8: + cache->buffer.map0 = MAPSURF_8BIT | MT_8BIT_I8; + cache->buffer.stride = width; + break; + } + cache->buffer.map0 |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + cache->buffer.map1 = ((cache->buffer.stride / 4) - 1) << MS4_PITCH_SHIFT; + + assert ((cache->buffer.stride & 7) == 0); + assert (i915_tiling_stride (tiling, cache->buffer.stride) == cache->buffer.stride); + assert (i915_tiling_height (tiling, height) == height); + + cache->buffer.bo = intel_bo_create (&device->intel, + height * cache->buffer.stride, + FALSE); + if (unlikely (cache->buffer.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + intel_bo_set_tiling (&device->intel, cache->buffer.bo, tiling, cache->buffer.stride); + cache->buffer.map0 |= MS3_tiling (cache->buffer.bo->tiling); + + cache->ref_count = 0; + cairo_list_init (&cache->link); + + return CAIRO_STATUS_SUCCESS; +} + +i915_surface_t * +i915_surface_create_from_cacheable_image_internal (i915_device_t *device, + cairo_image_surface_t *image) +{ + i915_surface_t *surface; + cairo_status_t status; + cairo_list_t *caches; + intel_buffer_cache_t *cache; + cairo_rtree_node_t *node; + cairo_format_t format; + int width, height, bpp; + + width = image->width; + height = image->height; + if (width > IMAGE_CACHE_WIDTH/2 || height > IMAGE_CACHE_HEIGHT/2) { + surface = (i915_surface_t *) + i915_surface_create_internal (&device->intel.base, + image->base.content, + width, height, + I915_TILING_NONE, FALSE); + if (unlikely (surface->intel.drm.base.status)) + return surface; + + status = intel_bo_put_image (&device->intel, + to_intel_bo (surface->intel.drm.bo), + surface->intel.drm.stride, + image, + 0, 0, + width, height, + 0, 0); + + if (unlikely (status)) { + cairo_surface_destroy (&surface->intel.drm.base); + return (i915_surface_t *) _cairo_surface_create_in_error (status); + } + + return surface; + } + + status = cairo_device_acquire (&device->intel.base.base); + if (unlikely (status)) + return (i915_surface_t *) _cairo_surface_create_in_error (status); + + switch (image->format) { + case CAIRO_FORMAT_ARGB32: + case CAIRO_FORMAT_RGB24: + caches = &device->image_caches[0]; + format = CAIRO_FORMAT_ARGB32; + bpp = 4; + break; + case CAIRO_FORMAT_A8: + case CAIRO_FORMAT_A1: + caches = &device->image_caches[1]; + format = CAIRO_FORMAT_A8; + bpp = 1; + break; + default: + ASSERT_NOT_REACHED; + status = _cairo_error (CAIRO_STATUS_INVALID_FORMAT); + goto CLEANUP_DEVICE; + } + + node = NULL; + cairo_list_foreach_entry (cache, intel_buffer_cache_t, caches, link) { + if (! intel_bo_is_inactive (&device->intel, cache->buffer.bo)) + continue; + + status = _cairo_rtree_insert (&cache->rtree, width, height, &node); + if (unlikely (_cairo_status_is_error (status))) + goto CLEANUP_DEVICE; + if (status == CAIRO_STATUS_SUCCESS) + break; + } + if (node == NULL) { + cache = malloc (sizeof (intel_buffer_cache_t)); + if (unlikely (cache == NULL)) { + status = _cairo_error (CAIRO_STATUS_NO_MEMORY); + goto CLEANUP_DEVICE; + } + + status = i915_buffer_cache_init (cache, device, format, + IMAGE_CACHE_WIDTH, + IMAGE_CACHE_HEIGHT); + if (unlikely (status)) { + free (cache); + goto CLEANUP_DEVICE; + } + + _cairo_rtree_init (&cache->rtree, + IMAGE_CACHE_WIDTH, + IMAGE_CACHE_HEIGHT, + 4, + sizeof (i915_image_private_t), + NULL); + + status = _cairo_rtree_insert (&cache->rtree, width, height, &node); + assert (status == CAIRO_STATUS_SUCCESS); + + cairo_list_init (&cache->link); + } + cairo_list_move (&cache->link, caches); + ((i915_image_private_t *) node)->container = cache; + + status = intel_bo_put_image (&device->intel, + cache->buffer.bo, cache->buffer.stride, + image, + 0, 0, + width, height, + node->x, node->y); + if (unlikely (status)) + goto CLEANUP_CACHE; + + surface = malloc (sizeof (i915_surface_t)); + if (unlikely (surface == NULL)) { + status = _cairo_error (CAIRO_STATUS_NO_MEMORY); + goto CLEANUP_CACHE; + } + + i915_surface_init (surface, image->base.content, &device->intel.base); + + surface->intel.drm.width = width; + surface->intel.drm.height = height; + surface->intel.drm.stride = cache->buffer.stride; + + surface->map0 |= MS3_tiling (cache->buffer.bo->tiling) | + ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + surface->map1 = (surface->intel.drm.stride/4 - 1) << MS4_PITCH_SHIFT; + + surface->intel.drm.bo = &intel_bo_reference (cache->buffer.bo)->base; + surface->offset = node->y * cache->buffer.stride + bpp * node->x; + + surface->cache = (i915_image_private_t *) node; + cache->ref_count++; + + cairo_device_release (&device->intel.base.base); + + return surface; + +CLEANUP_CACHE: + _cairo_rtree_node_destroy (&cache->rtree, node); + if (cache->ref_count == 0) { + intel_bo_destroy (&device->intel, cache->buffer.bo); + _cairo_rtree_fini (&cache->rtree); + cairo_list_del (&cache->link); + free (cache); + } +CLEANUP_DEVICE: + cairo_device_release (&device->intel.base.base); + return (i915_surface_t *) _cairo_surface_create_in_error (status); +} + +static cairo_surface_t * +i915_surface_create_from_cacheable_image (cairo_drm_device_t *device, + cairo_surface_t *source) +{ + i915_surface_t *surface; + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + + status = _cairo_surface_acquire_source_image (source, &image, &image_extra); + if (unlikely (status)) + return _cairo_surface_create_in_error (status); + + surface = i915_surface_create_from_cacheable_image_internal ((i915_device_t *) device, image); + + _cairo_surface_release_source_image (source, image, image_extra); + + return &surface->intel.drm.base; +} + +static cairo_status_t +i915_surface_enable_scan_out (void *abstract_surface) +{ + i915_surface_t *surface = abstract_surface; + intel_bo_t *bo; + cairo_status_t status; + + if (unlikely (surface->intel.drm.bo == NULL)) + return _cairo_error (CAIRO_STATUS_INVALID_SIZE); + + bo = to_intel_bo (surface->intel.drm.bo); + if (bo->tiling == I915_TILING_Y) { + status = i915_surface_batch_flush (surface); + if (unlikely (status)) + return status; + + intel_bo_set_tiling (to_intel_device (surface->intel.drm.base.device), + bo, I915_TILING_X, surface->intel.drm.stride); + if (bo->tiling == I915_TILING_X) { + surface->map0 &= ~MS3_tiling (I915_TILING_Y); + surface->map0 |= MS3_tiling (I915_TILING_X); + } + } + + if (unlikely (bo->tiling == I915_TILING_Y)) + return _cairo_error (CAIRO_STATUS_INVALID_FORMAT); /* XXX */ + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_int_status_t +i915_device_flush (cairo_drm_device_t *device) +{ + cairo_status_t status; + + if (unlikely (device->base.finished)) + return CAIRO_STATUS_SUCCESS; + + status = cairo_device_acquire (&device->base); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = i915_batch_flush ((i915_device_t *) device); + cairo_device_release (&device->base); + } + + return status; +} + +static cairo_int_status_t +i915_device_throttle (cairo_drm_device_t *device) +{ + cairo_status_t status; + + status = cairo_device_acquire (&device->base); + if (unlikely (status)) + return status; + + status = i915_batch_flush ((i915_device_t *) device); + intel_throttle ((intel_device_t *) device); + + cairo_device_release (&device->base); + + return status; +} + +static void +i915_device_destroy (void *data) +{ + i915_device_t *device = data; + + if (device->last_vbo) + intel_bo_destroy (&device->intel, device->last_vbo); + + i915_batch_cleanup (device); + + intel_device_fini (&device->intel); + free (device); +} + +COMPILE_TIME_ASSERT (sizeof (i915_batch_setup) == sizeof (((i915_device_t *)0)->batch_header)); +COMPILE_TIME_ASSERT (offsetof (i915_device_t, batch_base) == offsetof (i915_device_t, batch_header) + sizeof (i915_batch_setup)); + +cairo_drm_device_t * +_cairo_drm_i915_device_create (int fd, dev_t dev_id, int vendor_id, int chip_id) +{ + i915_device_t *device; + cairo_status_t status; + uint64_t gtt_size; + int n; + + if (! intel_info (fd, >t_size)) + return NULL; + + device = malloc (sizeof (i915_device_t)); + if (device == NULL) + return (cairo_drm_device_t *) _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); + + status = intel_device_init (&device->intel, fd); + if (unlikely (status)) { + free (device); + return (cairo_drm_device_t *) _cairo_device_create_in_error (status); + } + + device->debug = 0; + if (getenv ("CAIRO_DEBUG_DRM") != NULL) + device->debug = I915_DEBUG_BATCH; + + device->batch.gtt_size = I915_BATCH_SIZE; + device->batch.exec_count = 0; + device->batch.reloc_count = 0; + device->batch.used = 0; + + memcpy (device->batch_header, i915_batch_setup, sizeof (i915_batch_setup)); + device->vbo = 0; + device->vbo_offset = 0; + device->vbo_used = 0; + device->vertex_index = 0; + device->vertex_count = 0; + device->last_vbo = NULL; + + device->current_n_samplers = 0; + device->current_target = NULL; + device->current_source = NULL; + device->current_mask = NULL; + device->current_clip = NULL; + device->current_colorbuf = 0; + + for (n = 0; n < ARRAY_LENGTH (device->image_caches); n++) + cairo_list_init (&device->image_caches[n]); + + device->intel.base.surface.create = i915_surface_create; + device->intel.base.surface.create_for_name = i915_surface_create_for_name; + device->intel.base.surface.create_from_cacheable_image = i915_surface_create_from_cacheable_image; + + device->intel.base.surface.flink = _cairo_drm_surface_flink; + device->intel.base.surface.enable_scan_out = i915_surface_enable_scan_out; + device->intel.base.surface.map_to_image = intel_surface_map_to_image; + + device->intel.base.device.flush = i915_device_flush; + device->intel.base.device.throttle = i915_device_throttle; + device->intel.base.device.destroy = i915_device_destroy; + + i915_device_reset (device); + + return _cairo_drm_device_init (&device->intel.base, + fd, dev_id, vendor_id, chip_id, + 2048); +} diff --git a/src/drm/cairo-drm-i965-glyphs.c b/src/drm/cairo-drm-i965-glyphs.c new file mode 100644 index 00000000..fa86e341 --- /dev/null +++ b/src/drm/cairo-drm-i965-glyphs.c @@ -0,0 +1,500 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Red Hat, Inc. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-composite-rectangles-private.h" +#include "cairo-drm-i965-private.h" +#include "cairo-error-private.h" +#include "cairo-rtree-private.h" + +typedef struct _i965_glyphs i965_glyphs_t; + +typedef float * +(*i965_get_rectangle_func_t) (i965_glyphs_t *glyphs); + +struct _i965_glyphs { + i965_get_rectangle_func_t get_rectangle; + i965_shader_t shader; + + struct i965_vbo head, *tail; + + unsigned int vbo_offset; + float *vbo_base; +}; + +static float * +i965_glyphs_emit_rectangle (i965_glyphs_t *glyphs) +{ + return i965_add_rectangle (glyphs->shader.device); +} + +static float * +i965_glyphs_accumulate_rectangle (i965_glyphs_t *glyphs) +{ + float *vertices; + uint32_t size; + + size = glyphs->shader.device->rectangle_size; + if (unlikely (glyphs->vbo_offset + size > I965_VERTEX_SIZE)) { + struct i965_vbo *vbo; + + intel_bo_unmap (glyphs->tail->bo); + + vbo = malloc (sizeof (struct i965_vbo)); + if (unlikely (vbo == NULL)) { + /* throw error! */ + } + + glyphs->tail->next = vbo; + glyphs->tail = vbo; + + vbo->next = NULL; + vbo->bo = intel_bo_create (&glyphs->shader.device->intel, + I965_VERTEX_SIZE, FALSE); + vbo->count = 0; + + glyphs->vbo_offset = 0; + glyphs->vbo_base = intel_bo_map (&glyphs->shader.device->intel, vbo->bo); + } + + vertices = glyphs->vbo_base + glyphs->vbo_offset; + glyphs->vbo_offset += size; + glyphs->tail->count += 3; + + return vertices; +} + +static void +i965_add_glyph_rectangle (i965_glyphs_t *glyphs, + int x1, int y1, + int x2, int y2, + intel_glyph_t *glyph) +{ + float *v; + + /* Each vertex is: + * 2 vertex coordinates + * 1 glyph texture coordinate + */ + + v = glyphs->get_rectangle (glyphs); + + /* bottom right */ + *v++ = x2; *v++ = y2; + *v++ = glyph->texcoord[0]; + + /* bottom left */ + *v++ = x1; *v++ = y2; + *v++ = glyph->texcoord[1]; + + /* top left */ + *v++ = x1; *v++ = y1; + *v++ = glyph->texcoord[2]; +} + +static cairo_status_t +i965_surface_mask_internal (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *source, + i965_surface_t *mask, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + i965_device_t *device; + i965_shader_t shader; + cairo_region_t *clip_region = NULL; + cairo_status_t status; + + i965_shader_init (&shader, dst, op); + + status = i965_shader_acquire_pattern (&shader, &shader.source, + source, &extents->bounded); + if (unlikely (status)) + return status; + + shader.mask.type.vertex = VS_NONE; + shader.mask.type.fragment = FS_SURFACE; + shader.mask.base.content = mask->intel.drm.base.content; + shader.mask.base.filter = i965_filter (CAIRO_FILTER_NEAREST); + shader.mask.base.extend = i965_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_translate (&shader.mask.base.matrix, + -extents->bounded.x + NEAREST_BIAS, + -extents->bounded.y + NEAREST_BIAS); + cairo_matrix_scale (&shader.mask.base.matrix, + 1. / mask->intel.drm.width, + 1. / mask->intel.drm.height); + + shader.mask.base.bo = to_intel_bo (mask->intel.drm.bo); + shader.mask.base.format = mask->intel.drm.format; + shader.mask.base.width = mask->intel.drm.width; + shader.mask.base.height = mask->intel.drm.height; + shader.mask.base.stride = mask->intel.drm.stride; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&shader, clip); + } + + status = cairo_device_acquire (dst->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SHADER; + + device = i965_device (dst); + + status = i965_shader_commit (&shader, device); + if (unlikely (status)) + goto CLEANUP_DEVICE; + + if (clip_region != NULL) { + unsigned int n, num_rectangles; + + num_rectangles = cairo_region_num_rectangles (clip_region); + for (n = 0; n < num_rectangles; n++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, n, &rect); + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + } else { + i965_shader_add_rectangle (&shader, + extents->bounded.x, + extents->bounded.y, + extents->bounded.width, + extents->bounded.height); + } + + if (! extents->is_bounded) + status = i965_fixup_unbounded (dst, extents, clip); + + CLEANUP_DEVICE: + cairo_device_release (&device->intel.base.base); + CLEANUP_SHADER: + i965_shader_fini (&shader); + return status; +} + +cairo_int_status_t +i965_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *g, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining) +{ + i965_surface_t *surface = abstract_surface; + i965_surface_t *mask = NULL; + i965_device_t *device; + i965_glyphs_t glyphs; + cairo_composite_rectangles_t extents; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_bool_t overlap; + cairo_region_t *clip_region = NULL; + intel_bo_t *last_bo = NULL; + cairo_scaled_glyph_t *glyph_cache[64]; + cairo_status_t status; + int mask_x = 0, mask_y = 0; + int i = 0; + + *num_remaining = 0; + status = _cairo_composite_rectangles_init_for_glyphs (&extents, + surface->intel.drm.width, + surface->intel.drm.height, + op, source, + scaled_font, + g, num_glyphs, + clip, + &overlap); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL && extents.is_bounded) { + clip = _cairo_clip_init_copy (&local_clip, clip); + status = _cairo_clip_rectangle (clip, &extents.bounded); + if (unlikely (status)) + return status; + + have_clip = TRUE; + } + + if (overlap || ! extents.is_bounded) { + cairo_content_t content; + + content = CAIRO_CONTENT_ALPHA; + if (scaled_font->options.antialias == CAIRO_ANTIALIAS_SUBPIXEL) + content |= CAIRO_CONTENT_COLOR; + + mask = (i965_surface_t *) + i965_surface_create_internal (&i965_device (surface)->intel.base, + content, + extents.bounded.width, + extents.bounded.height, + I965_TILING_DEFAULT, + TRUE); + if (unlikely (mask->intel.drm.base.status)) + return mask->intel.drm.base.status; + + status = _cairo_surface_paint (&mask->intel.drm.base, + CAIRO_OPERATOR_CLEAR, + &_cairo_pattern_clear.base, + NULL); + if (unlikely (status)) { + cairo_surface_destroy (&mask->intel.drm.base); + return status; + } + + i965_shader_init (&glyphs.shader, mask, CAIRO_OPERATOR_ADD); + + status = i965_shader_acquire_pattern (&glyphs.shader, &glyphs.shader.source, + &_cairo_pattern_white.base, + &extents.bounded); + if (unlikely (status)) { + cairo_surface_destroy (&mask->intel.drm.base); + return status; + } + + mask_x = -extents.bounded.x; + mask_y = -extents.bounded.y; + } else { + i965_shader_init (&glyphs.shader, surface, op); + + status = i965_shader_acquire_pattern (&glyphs.shader, &glyphs.shader.source, + source, &extents.bounded); + if (unlikely (status)) + return status; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&glyphs.shader, clip); + } + } + + glyphs.head.next = NULL; + glyphs.head.bo = NULL; + glyphs.head.count = 0; + glyphs.tail = &glyphs.head; + + device = i965_device (surface); + if (mask != NULL || clip_region == NULL) { + glyphs.get_rectangle = i965_glyphs_emit_rectangle; + } else { + glyphs.get_rectangle = i965_glyphs_accumulate_rectangle; + glyphs.head.bo = intel_bo_create (&device->intel, + I965_VERTEX_SIZE, FALSE); + if (unlikely (glyphs.head.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + glyphs.vbo_base = intel_bo_map (&device->intel, glyphs.head.bo); + } + glyphs.vbo_offset = 0; + + status = cairo_device_acquire (&device->intel.base.base); + if (unlikely (status)) + goto CLEANUP_GLYPHS; + + _cairo_scaled_font_freeze_cache (scaled_font); + //private = _cairo_scaled_font_get_device (scaled_font, device); + if (scaled_font->surface_private == NULL) { + /* XXX couple into list to remove on context destruction */ + scaled_font->surface_private = device; + scaled_font->surface_backend = surface->intel.drm.base.backend; + } + + memset (glyph_cache, 0, sizeof (glyph_cache)); + + for (i = 0; i < num_glyphs; i++) { + cairo_scaled_glyph_t *scaled_glyph; + int x, y, x1, x2, y1, y2; + int cache_index = g[i].index % ARRAY_LENGTH (glyph_cache); + intel_glyph_t *glyph; + + scaled_glyph = glyph_cache[cache_index]; + if (scaled_glyph == NULL || + _cairo_scaled_glyph_index (scaled_glyph) != g[i].index) + { + status = _cairo_scaled_glyph_lookup (scaled_font, + g[i].index, + CAIRO_SCALED_GLYPH_INFO_METRICS, + &scaled_glyph); + if (unlikely (status)) + goto FINISH; + + glyph_cache[cache_index] = scaled_glyph; + } + + if (unlikely (scaled_glyph->metrics.width == 0 || + scaled_glyph->metrics.height == 0)) + { + continue; + } + + /* XXX glyph images are snapped to pixel locations */ + x = _cairo_lround (g[i].x); + y = _cairo_lround (g[i].y); + + x1 = x + _cairo_fixed_integer_floor (scaled_glyph->bbox.p1.x); + y1 = y + _cairo_fixed_integer_floor (scaled_glyph->bbox.p1.y); + x2 = x + _cairo_fixed_integer_ceil (scaled_glyph->bbox.p2.x); + y2 = y + _cairo_fixed_integer_ceil (scaled_glyph->bbox.p2.y); + + if (x2 < extents.bounded.x || + y2 < extents.bounded.y || + x1 > extents.bounded.x + extents.bounded.width || + y1 > extents.bounded.y + extents.bounded.height) + { + continue; + } + + if (scaled_glyph->surface_private == NULL) { + status = intel_get_glyph (&device->intel, scaled_font, scaled_glyph); + if (unlikely (status == CAIRO_INT_STATUS_NOTHING_TO_DO)) { + status = CAIRO_STATUS_SUCCESS; + continue; + } + if (unlikely (status)) + goto FINISH; + } + glyph = intel_glyph_pin (scaled_glyph->surface_private); + + if (glyph->cache->buffer.bo != last_bo) { + intel_buffer_cache_t *cache = glyph->cache; + + glyphs.shader.mask.type.vertex = VS_GLYPHS; + glyphs.shader.mask.type.fragment = FS_GLYPHS; + glyphs.shader.mask.type.pattern = PATTERN_BASE; + + glyphs.shader.mask.base.bo = cache->buffer.bo; + glyphs.shader.mask.base.format = cache->buffer.format; + glyphs.shader.mask.base.width = cache->buffer.width; + glyphs.shader.mask.base.height = cache->buffer.height; + glyphs.shader.mask.base.stride = cache->buffer.stride; + glyphs.shader.mask.base.filter = i965_filter (CAIRO_FILTER_NEAREST); + glyphs.shader.mask.base.extend = i965_extend (CAIRO_EXTEND_NONE); + glyphs.shader.mask.base.content = CAIRO_CONTENT_ALPHA; /* XXX */ + + glyphs.shader.committed = FALSE; + status = i965_shader_commit (&glyphs.shader, device); + if (unlikely (status)) + goto FINISH; + + last_bo = cache->buffer.bo; + } + + x1 += mask_x; x2 += mask_x; + y1 += mask_y; y2 += mask_y; + + i965_add_glyph_rectangle (&glyphs, x1, y1, x2, y2, glyph); + } + + if (mask != NULL && clip_region != NULL) { + intel_bo_unmap (glyphs.tail->bo); + i965_clipped_vertices (device, &glyphs.head, clip_region); + } + + status = CAIRO_STATUS_SUCCESS; + FINISH: + _cairo_scaled_font_thaw_cache (scaled_font); + cairo_device_release (surface->intel.drm.base.device); + CLEANUP_GLYPHS: + i965_shader_fini (&glyphs.shader); + if (glyphs.head.bo != NULL) { + struct i965_vbo *vbo, *next; + + intel_bo_destroy (&device->intel, glyphs.head.bo); + for (vbo = glyphs.head.next; vbo != NULL; vbo = next) { + next = vbo->next; + intel_bo_destroy (&device->intel, vbo->bo); + free (vbo); + } + } + + if (unlikely (status == CAIRO_INT_STATUS_UNSUPPORTED)) { + cairo_path_fixed_t path; + + _cairo_path_fixed_init (&path); + status = _cairo_scaled_font_glyph_path (scaled_font, + g + i, num_glyphs - i, + &path); + if (mask_x | mask_y) { + _cairo_path_fixed_translate (&path, + _cairo_fixed_from_int (mask_x), + _cairo_fixed_from_int (mask_y)); + } + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = surface->intel.drm.base.backend->fill (glyphs.shader.target, + glyphs.shader.op, + mask != NULL ? &_cairo_pattern_white.base : source, + &path, + CAIRO_FILL_RULE_WINDING, + 0, + scaled_font->options.antialias, + clip); + } + _cairo_path_fixed_fini (&path); + } + + if (mask != NULL) { + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = i965_surface_mask_internal (surface, op, source, mask, + clip, &extents); + } + cairo_surface_finish (&mask->intel.drm.base); + cairo_surface_destroy (&mask->intel.drm.base); + } + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} diff --git a/src/drm/cairo-drm-i965-private.h b/src/drm/cairo-drm-i965-private.h new file mode 100644 index 00000000..14fabe9d --- /dev/null +++ b/src/drm/cairo-drm-i965-private.h @@ -0,0 +1,742 @@ +#ifndef CAIRO_DRM_I965_PRIVATE_H +#define CAIRO_DRM_I965_PRIVATE_H + +#include "cairo-drm-intel-private.h" + +#include "cairo-hash-private.h" +#include "cairo-freelist-private.h" + +#include "cairo-drm-intel-brw-defines.h" + +#include <setjmp.h> + +#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3) + +/* + * New regs for broadwater -- we need to split this file up sensibly somehow. + */ +#define BRW_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define BRW_URB_FENCE BRW_3D(0, 0, 0) +#define BRW_CS_URB_STATE BRW_3D(0, 0, 1) +#define BRW_CONSTANT_BUFFER BRW_3D(0, 0, 2) +#define BRW_STATE_PREFETCH BRW_3D(0, 0, 3) + +#define BRW_STATE_BASE_ADDRESS BRW_3D(0, 1, 1) +#define BRW_STATE_SIP BRW_3D(0, 1, 2) +#define BRW_PIPELINE_SELECT BRW_3D(0, 1, 4) + +#define NEW_PIPELINE_SELECT BRW_3D(1, 1, 4) + +#define BRW_MEDIA_STATE_POINTERS BRW_3D(2, 0, 0) +#define BRW_MEDIA_OBJECT BRW_3D(2, 1, 0) + +#define BRW_3DSTATE_PIPELINED_POINTERS BRW_3D(3, 0, 0) +#define BRW_3DSTATE_BINDING_TABLE_POINTERS BRW_3D(3, 0, 1) +#define BRW_3DSTATE_VERTEX_BUFFERS BRW_3D(3, 0, 8) +#define BRW_3DSTATE_VERTEX_ELEMENTS BRW_3D(3, 0, 9) +#define BRW_3DSTATE_INDEX_BUFFER BRW_3D(3, 0, 0xa) +#define BRW_3DSTATE_VF_STATISTICS BRW_3D(3, 0, 0xb) + +#define BRW_3DSTATE_DRAWING_RECTANGLE BRW_3D(3, 1, 0) +#define BRW_3DSTATE_CONSTANT_COLOR BRW_3D(3, 1, 1) +#define BRW_3DSTATE_SAMPLER_PALETTE_LOAD BRW_3D(3, 1, 2) +#define BRW_3DSTATE_CHROMA_KEY BRW_3D(3, 1, 4) +#define BRW_3DSTATE_DEPTH_BUFFER BRW_3D(3, 1, 5) +#define BRW_3DSTATE_POLY_STIPPLE_OFFSET BRW_3D(3, 1, 6) +#define BRW_3DSTATE_POLY_STIPPLE_PATTERN BRW_3D(3, 1, 7) +#define BRW_3DSTATE_LINE_STIPPLE BRW_3D(3, 1, 8) +#define BRW_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP BRW_3D(3, 1, 9) +/* These two are BLC and CTG only, not BW or CL */ +#define BRW_3DSTATE_AA_LINE_PARAMS BRW_3D(3, 1, 0xa) +#define BRW_3DSTATE_GS_SVB_INDEX BRW_3D(3, 1, 0xb) + +#define BRW_PIPE_CONTROL BRW_3D(3, 2, 0) + +#define BRW_3DPRIMITIVE BRW_3D(3, 3, 0) + +#define PIPELINE_SELECT_3D 0 +#define PIPELINE_SELECT_MEDIA 1 + +#define UF0_CS_REALLOC (1 << 13) +#define UF0_VFE_REALLOC (1 << 12) +#define UF0_SF_REALLOC (1 << 11) +#define UF0_CLIP_REALLOC (1 << 10) +#define UF0_GS_REALLOC (1 << 9) +#define UF0_VS_REALLOC (1 << 8) +#define UF1_CLIP_FENCE_SHIFT 20 +#define UF1_GS_FENCE_SHIFT 10 +#define UF1_VS_FENCE_SHIFT 0 +#define UF2_CS_FENCE_SHIFT 20 +#define UF2_VFE_FENCE_SHIFT 10 +#define UF2_SF_FENCE_SHIFT 0 + +/* for BRW_STATE_BASE_ADDRESS */ +#define BASE_ADDRESS_MODIFY (1 << 0) + +/* for BRW_3DSTATE_PIPELINED_POINTERS */ +#define BRW_GS_DISABLE 0 +#define BRW_GS_ENABLE 1 +#define BRW_CLIP_DISABLE 0 +#define BRW_CLIP_ENABLE 1 + +/* for BRW_PIPE_CONTROL */ +#define BRW_PIPE_CONTROL_NOWRITE (0 << 14) +#define BRW_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define BRW_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define BRW_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define BRW_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define BRW_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define BRW_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define BRW_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define BRW_PIPE_CONTROL_LOCAL_PGTT (0 << 2) + +/* VERTEX_BUFFER_STATE Structure */ +#define VB0_BUFFER_INDEX_SHIFT 27 +#define VB0_VERTEXDATA (0 << 26) +#define VB0_INSTANCEDATA (1 << 26) +#define VB0_BUFFER_PITCH_SHIFT 0 + +/* VERTEX_ELEMENT_STATE Structure */ +#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27 +#define VE0_VALID (1 << 26) +#define VE0_FORMAT_SHIFT 16 +#define VE0_OFFSET_SHIFT 0 +#define VE1_VFCOMPONENT_0_SHIFT 28 +#define VE1_VFCOMPONENT_1_SHIFT 24 +#define VE1_VFCOMPONENT_2_SHIFT 20 +#define VE1_VFCOMPONENT_3_SHIFT 16 +#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 + +/* 3DPRIMITIVE bits */ +#define BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) +#define BRW_3DPRIMITIVE_VERTEX_RANDOM (1 << 15) +/* Primitive types are in brw_defines.h */ +#define BRW_3DPRIMITIVE_TOPOLOGY_SHIFT 10 + +#define BRW_SVG_CTL 0x7400 + +#define BRW_SVG_CTL_GS_BA (0 << 8) +#define BRW_SVG_CTL_SS_BA (1 << 8) +#define BRW_SVG_CTL_IO_BA (2 << 8) +#define BRW_SVG_CTL_GS_AUB (3 << 8) +#define BRW_SVG_CTL_IO_AUB (4 << 8) +#define BRW_SVG_CTL_SIP (5 << 8) + +#define BRW_SVG_RDATA 0x7404 +#define BRW_SVG_WORK_CTL 0x7408 + +#define BRW_VF_CTL 0x7500 + +#define BRW_VF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8) +#define BRW_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8) +#define BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4) +#define BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4) +#define BRW_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3) +#define BRW_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2) +#define BRW_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1) +#define BRW_VF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_VF_STRG_VAL 0x7504 +#define BRW_VF_STR_VL_OVR 0x7508 +#define BRW_VF_VC_OVR 0x750c +#define BRW_VF_STR_PSKIP 0x7510 +#define BRW_VF_MAX_PRIM 0x7514 +#define BRW_VF_RDATA 0x7518 + +#define BRW_VS_CTL 0x7600 +#define BRW_VS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8) +#define BRW_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8) +#define BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8) +#define BRW_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8) +#define BRW_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define BRW_VS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_VS_STRG_VAL 0x7604 +#define BRW_VS_RDATA 0x7608 + +#define BRW_SF_CTL 0x7b00 +#define BRW_SF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8) +#define BRW_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4) +#define BRW_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3) +#define BRW_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define BRW_SF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_SF_STRG_VAL 0x7b04 +#define BRW_SF_RDATA 0x7b18 + +#define BRW_WIZ_CTL 0x7c00 +#define BRW_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16 +#define BRW_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8) +#define BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8) +#define BRW_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8) +#define BRW_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6) +#define BRW_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5) +#define BRW_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4) +#define BRW_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3) +#define BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define BRW_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define BRW_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_WIZ_STRG_VAL 0x7c04 +#define BRW_WIZ_RDATA 0x7c18 + +#define BRW_TS_CTL 0x7e00 +#define BRW_TS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8) +#define BRW_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8) +#define BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2) +#define BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1) +#define BRW_TS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_TS_STRG_VAL 0x7e04 +#define BRW_TS_RDATA 0x7e08 + +#define BRW_TD_CTL 0x8000 +#define BRW_TD_CTL_MUX_SHIFT 8 +#define BRW_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7) +#define BRW_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6) +#define BRW_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5) +#define BRW_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4) +#define BRW_TD_CTL_BREAKPOINT_ENABLE (1 << 2) +#define BRW_TD_CTL2 0x8004 +#define BRW_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28) +#define BRW_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26) +#define BRW_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25) +#define BRW_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16 +#define BRW_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8) +#define BRW_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7) +#define BRW_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6) +#define BRW_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5) +#define BRW_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4) +#define BRW_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3) +#define BRW_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0) +#define BRW_TD_VF_VS_EMSK 0x8008 +#define BRW_TD_GS_EMSK 0x800c +#define BRW_TD_CLIP_EMSK 0x8010 +#define BRW_TD_SF_EMSK 0x8014 +#define BRW_TD_WIZ_EMSK 0x8018 +#define BRW_TD_0_6_EHTRG_VAL 0x801c +#define BRW_TD_0_7_EHTRG_VAL 0x8020 +#define BRW_TD_0_6_EHTRG_MSK 0x8024 +#define BRW_TD_0_7_EHTRG_MSK 0x8028 +#define BRW_TD_RDATA 0x802c +#define BRW_TD_TS_EMSK 0x8030 + +#define BRW_EU_CTL 0x8800 +#define BRW_EU_CTL_SELECT_SHIFT 16 +#define BRW_EU_CTL_DATA_MUX_SHIFT 8 +#define BRW_EU_ATT_0 0x8810 +#define BRW_EU_ATT_1 0x8814 +#define BRW_EU_ATT_DATA_0 0x8820 +#define BRW_EU_ATT_DATA_1 0x8824 +#define BRW_EU_ATT_CLR_0 0x8830 +#define BRW_EU_ATT_CLR_1 0x8834 +#define BRW_EU_RDATA 0x8840 + +typedef struct i965_device i965_device_t; +typedef struct i965_surface i965_surface_t; +typedef struct i965_shader i965_shader_t; +typedef struct i965_stream i965_stream_t; + +struct i965_sf_state { + cairo_hash_entry_t entry; + uint32_t offset; +}; + +cairo_private cairo_bool_t +i965_sf_state_equal (const void *, const void *); + +struct i965_cc_state { + cairo_hash_entry_t entry; + uint32_t offset; +}; + +cairo_private cairo_bool_t +i965_cc_state_equal (const void *, const void *); + +struct i965_wm_kernel { + cairo_hash_entry_t entry; + uint32_t offset; +}; + +struct i965_wm_state { + cairo_hash_entry_t entry; + uint32_t kernel; + uint32_t sampler; + uint32_t offset; +}; + +cairo_private cairo_bool_t +i965_wm_state_equal (const void *, const void *); + +struct i965_wm_binding { + cairo_hash_entry_t entry; + uint32_t table[4]; + int size; + uint32_t offset; +}; + +cairo_private cairo_bool_t +i965_wm_binding_equal (const void *, const void *); + +struct i965_sampler { + cairo_hash_entry_t entry; + uint32_t offset; +}; + +struct i965_vbo { + struct i965_vbo *next; + intel_bo_t *bo; + unsigned int count; +}; + +struct i965_surface { + intel_surface_t intel; + + uint32_t stream; + uint32_t offset; +}; + +struct i965_pending_relocation { + uint32_t offset; + uint32_t read_domains; + uint32_t write_domain; + uint32_t delta; +}; + +struct i965_stream { + uint32_t used; + uint32_t committed; + uint32_t size; + uint8_t *data; + uint32_t serial; + + int num_pending_relocations; + int max_pending_relocations; + struct i965_pending_relocation *pending_relocations; + + int num_relocations; + int max_relocations; + struct drm_i915_gem_relocation_entry *relocations; +}; + +#define I965_BATCH_SIZE (16 * 4096) +#define I965_SURFACE_SIZE (16 * 4096) +#define I965_GENERAL_SIZE (16 * 4096) +#define I965_CONSTANT_SIZE (16 * 4096) +#define I965_VERTEX_SIZE (128 * 4096) + +#define I965_TILING_DEFAULT I915_TILING_Y + + +struct i965_device { + intel_device_t intel; + + cairo_bool_t is_g4x; + + i965_shader_t *shader; /* note: only valid during geometry emission */ + + /* track state changes */ + struct i965_sf_state sf_state; + struct i965_cc_state cc_state; + struct i965_wm_state wm_state; + struct i965_wm_binding wm_binding; + + i965_surface_t *target; + uint32_t target_offset; + + intel_bo_t *source; + uint32_t source_offset; + + intel_bo_t *mask; + uint32_t mask_offset; + + intel_bo_t *clip; + uint32_t clip_offset; + + uint32_t draw_rectangle; + + uint32_t vs_offset; + uint32_t border_color_offset; + cairo_hash_table_t *sf_states; + cairo_hash_table_t *cc_states; + cairo_hash_table_t *wm_kernels; + cairo_hash_table_t *wm_states; + cairo_hash_table_t *wm_bindings; + cairo_hash_table_t *samplers; + intel_bo_t *general_state; + + cairo_freelist_t sf_freelist; + cairo_freelist_t cc_freelist; + cairo_freelist_t wm_kernel_freelist; + cairo_freelist_t wm_state_freelist; + cairo_freelist_t wm_binding_freelist; + cairo_freelist_t sampler_freelist; + + uint32_t vertex_type; + uint32_t vertex_size; + uint32_t rectangle_size; + uint32_t last_vertex_size; + + float *constants; /* 4 x matrix + 2 x source */ + unsigned constants_size; + cairo_bool_t have_urb_fences; + + i965_stream_t batch; + uint8_t batch_base[I965_BATCH_SIZE]; + struct drm_i915_gem_relocation_entry batch_relocations[1024]; + + i965_stream_t surface; + uint8_t surface_base[I965_SURFACE_SIZE]; + struct i965_pending_relocation surface_pending_relocations[1]; + struct drm_i915_gem_relocation_entry surface_relocations[512]; + + i965_stream_t general; + uint8_t general_base[I965_GENERAL_SIZE]; + struct i965_pending_relocation general_pending_relocations[1]; + + i965_stream_t vertex; + uint8_t vertex_base[I965_VERTEX_SIZE]; + struct i965_pending_relocation vertex_pending_relocations[512]; + + i965_stream_t constant; + uint8_t constant_base[I965_CONSTANT_SIZE]; + struct i965_pending_relocation constant_pending_relocations[512]; + + struct { + size_t gtt_size; + + intel_bo_t *bo[1024]; + int count; + + struct drm_i915_gem_exec_object2 exec[1024]; + } exec; + cairo_list_t flush; +}; + +typedef enum { + VS_NONE = 0, + VS_GLYPHS, + VS_SPANS, +} i965_vertex_shader_t; + +typedef enum { + FS_NONE = 0, + FS_CONSTANT, + FS_LINEAR, + FS_RADIAL, + FS_SURFACE, + FS_GLYPHS, + FS_SPANS, +} i965_fragment_shader_t; + +typedef enum { + PATTERN_BASE, + PATTERN_SOLID, + PATTERN_LINEAR, + PATTERN_RADIAL, + PATTERN_SURFACE, +} i965_shader_channel_t; +#define PATTERN_NONE (i965_shader_channel_t)-1 + +struct i965_shader { + i965_device_t *device; + i965_surface_t *target; + + cairo_operator_t op; + + cairo_bool_t committed; + cairo_bool_t need_combine; + + float constants[4*8 + 2*8]; /* 4 x matrix + 2 x source */ + unsigned constants_size; + + union i965_shader_channel { + struct { + i965_vertex_shader_t vertex; + i965_fragment_shader_t fragment; + i965_shader_channel_t pattern; + } type; + struct i965_shader_base { + i965_vertex_shader_t vertex; + i965_fragment_shader_t fragment; + i965_shader_channel_t pattern; + + uint32_t mode; + + float constants[8]; + unsigned constants_size; + + intel_bo_t *bo; + cairo_format_t format; + cairo_content_t content; + int width, height, stride; + int filter, extend; + cairo_matrix_t matrix; + cairo_bool_t has_component_alpha; + } base; + struct i965_shader_solid { + struct i965_shader_base base; + } solid; + struct i965_shader_linear { + struct i965_shader_base base; + } linear; + struct i965_shader_radial { + struct i965_shader_base base; + } radial; + struct i965_shader_surface { + struct i965_shader_base base; + cairo_surface_t *surface; + } surface; + } source, mask, clip, dst; + + jmp_buf unwind; +}; + +enum i965_shader_linear_mode { + /* XXX REFLECT */ + LINEAR_TEXTURE, + LINEAR_NONE, + LINEAR_REPEAT, + LINEAR_PAD, +}; + +enum i965_shader_radial_mode { + RADIAL_ONE, + RADIAL_TWO +}; + +typedef cairo_status_t +(*i965_spans_func_t) (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents); + +static inline i965_device_t * +i965_device (i965_surface_t *surface) +{ + return (i965_device_t *) surface->intel.drm.base.device; +} + +cairo_private void +i965_emit_relocation (i965_device_t *device, + i965_stream_t *stream, + intel_bo_t *target, + uint32_t target_offset, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); + +static always_inline uint32_t +i965_stream_emit (i965_stream_t *stream, const void *data, size_t size) +{ + uint32_t offset; + + offset = stream->used; + assert (offset + size <= stream->size); + memcpy (stream->data + offset, data, size); + stream->used += size; + + return offset; +} + +static always_inline void +i965_stream_align (i965_stream_t *stream, uint32_t size) +{ + stream->used = (stream->used + size - 1) & -size; +} + +static always_inline void * +i965_stream_alloc (i965_stream_t *stream, uint32_t align, uint32_t size) +{ + void *ptr; + + if (align) + i965_stream_align (stream, align); + + assert (stream->used + size <= stream->size); + ptr = stream->data + stream->used; + stream->used += size; + + return ptr; +} + +static always_inline uint32_t +i965_stream_offsetof (i965_stream_t *stream, const void *ptr) +{ + return (char *) ptr - (char *) stream->data; +} + +cairo_private void +i965_stream_commit (i965_device_t *device, + i965_stream_t *stream); + +cairo_private void +i965_general_state_reset (i965_device_t *device); + +static inline void +i965_batch_emit_dword (i965_device_t *device, uint32_t dword) +{ + *(uint32_t *) (device->batch.data + device->batch.used) = dword; + device->batch.used += 4; +} + +#define OUT_BATCH(dword) i965_batch_emit_dword(device, dword) + +cairo_private void +i965_clipped_vertices (i965_device_t *device, + struct i965_vbo *vbo, + cairo_region_t *clip_region); + +cairo_private void +i965_flush_vertices (i965_device_t *device); + +cairo_private void +i965_finish_vertices (i965_device_t *device); + +static inline float * +i965_add_rectangle (i965_device_t *device) +{ + float *vertices; + uint32_t size; + + size = device->rectangle_size; + if (unlikely (device->vertex.used + size > device->vertex.size)) + i965_finish_vertices (device); + + vertices = (float *) (device->vertex.data + device->vertex.used); + device->vertex.used += size; + + return vertices; +} + +static inline void +i965_shader_add_rectangle (const i965_shader_t *shader, + int x, int y, + int w, int h) +{ + float *v; + + v= i965_add_rectangle (shader->device); + + /* bottom-right */ + *v++ = x + w; + *v++ = y + h; + + /* bottom-left */ + *v++ = x; + *v++ = y + h; + + /* top-left */ + *v++ = x; + *v++ = y; +} + +cairo_private cairo_surface_t * +i965_surface_create_internal (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height, + uint32_t tiling, + cairo_bool_t gpu_target); + +cairo_private cairo_status_t +i965_clip_and_composite_spans (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + i965_spans_func_t draw_func, + void *draw_closure, + const cairo_composite_rectangles_t*extents, + cairo_clip_t *clip); + +cairo_private cairo_int_status_t +i965_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining); + +cairo_private void +i965_shader_init (i965_shader_t *shader, + i965_surface_t *dst, + cairo_operator_t op); + +cairo_private cairo_status_t +i965_shader_acquire_pattern (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_pattern_t *pattern, + const cairo_rectangle_int_t *extents); + +cairo_private void +i965_shader_set_clip (i965_shader_t *shader, + cairo_clip_t *clip); + +cairo_private cairo_status_t +i965_shader_commit (i965_shader_t *shader, + i965_device_t *device); + +cairo_private void +i965_shader_fini (i965_shader_t *shader); + +cairo_private cairo_status_t +i965_device_flush (i965_device_t *device); + +cairo_private cairo_status_t +i965_fixup_unbounded (i965_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip); + +static inline int +i965_filter (cairo_filter_t filter) +{ + switch (filter) { + default: + case CAIRO_FILTER_FAST: + case CAIRO_FILTER_NEAREST: + return BRW_MAPFILTER_NEAREST; + + case CAIRO_FILTER_GOOD: + case CAIRO_FILTER_BEST: + case CAIRO_FILTER_BILINEAR: + case CAIRO_FILTER_GAUSSIAN: + return BRW_MAPFILTER_LINEAR; + } +} + +static inline int +i965_extend (cairo_extend_t extend) +{ + switch (extend) { + default: + case CAIRO_EXTEND_NONE: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + case CAIRO_EXTEND_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + case CAIRO_EXTEND_PAD: + return BRW_TEXCOORDMODE_CLAMP; + case CAIRO_EXTEND_REFLECT: + return BRW_TEXCOORDMODE_MIRROR; + } +} + +#endif /* CAIRO_DRM_I965_PRIVATE_H */ diff --git a/src/drm/cairo-drm-i965-shader.c b/src/drm/cairo-drm-i965-shader.c new file mode 100644 index 00000000..fc9ae570 --- /dev/null +++ b/src/drm/cairo-drm-i965-shader.c @@ -0,0 +1,2852 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Kristian Høgsberg + * Copyright © 2009 Chris Wilson + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + * Kristian Høgsberg <krh@bitplanet.net> + */ + +#include "cairoint.h" + +#include "cairo-error-private.h" +#include "cairo-drm-i965-private.h" +#include "cairo-surface-subsurface-private.h" +#include "cairo-surface-snapshot-private.h" + +#include "cairo-drm-intel-brw-eu.h" + +#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS +/* for DRI2/DRM interoperability */ +#include "cairo-xcb-private.h" +#endif + +/* Theory of shaders: + * + * 3 types of rectangular inputs: + * (a) standard composite: x,y, use source, mask matrices to compute texcoords + * (b) spans: x,y, alpha, use source matrix + * (c) glyphs: x,y, s,t, use source matrix + * + * 5 types of pixel shaders: + * (a) Solid colour + * (b) Linear gradient (via 1D texture, with precomputed tex) + * (c) Radial gradient (per-pixel s computation, 1D texture) + * (d) Spans (mask only): apply opacity + * (e) Texture (includes glyphs). + * + * Clip masks are limited to 2D textures only. + */ + +/* XXX dual source blending for LERP + ComponentAlpha!!! */ + +#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) + +#define SF_KERNEL_NUM_GRF 1 +#define SF_MAX_THREADS 24 + +#define PS_MAX_THREADS_CTG 50 +#define PS_MAX_THREADS_BRW 32 + +#define URB_CS_ENTRY_SIZE 3 /* We need 4 matrices + 2 sources */ +#define URB_CS_ENTRIES 4 /* 4x sets of CONSTANT_BUFFER */ + +#define URB_VS_ENTRY_SIZE 1 +#define URB_VS_ENTRIES 8 + +#define URB_GS_ENTRY_SIZE 0 +#define URB_GS_ENTRIES 0 + +#define URB_CLIP_ENTRY_SIZE 0 +#define URB_CLIP_ENTRIES 0 + +#define URB_SF_ENTRY_SIZE 1 +#define URB_SF_ENTRIES (SF_MAX_THREADS + 1) + +static void +i965_pipelined_flush (i965_device_t *device) +{ + intel_bo_t *bo, *next; + + if (device->batch.used == 0) + return; + + OUT_BATCH (BRW_PIPE_CONTROL | + BRW_PIPE_CONTROL_NOWRITE | + BRW_PIPE_CONTROL_WC_FLUSH | + 2); + OUT_BATCH(0); /* Destination address */ + OUT_BATCH(0); /* Immediate data low DW */ + OUT_BATCH(0); /* Immediate data high DW */ + + cairo_list_foreach_entry_safe (bo, next, intel_bo_t, &device->flush, link) { + bo->batch_write_domain = 0; + cairo_list_init (&bo->link); + } + cairo_list_init (&device->flush); +} + +static cairo_status_t +i965_shader_acquire_solid (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_solid_pattern_t *solid, + const cairo_rectangle_int_t *extents) +{ + src->type.fragment = FS_CONSTANT; + src->type.vertex = VS_NONE; + src->type.pattern = PATTERN_SOLID; + + src->base.content = solid->content; + if (CAIRO_COLOR_IS_OPAQUE(&solid->color)) + src->base.content &= ~CAIRO_CONTENT_ALPHA; + + src->base.constants[0] = solid->color.red * solid->color.alpha; + src->base.constants[1] = solid->color.green * solid->color.alpha; + src->base.constants[2] = solid->color.blue * solid->color.alpha; + src->base.constants[3] = solid->color.alpha; + src->base.constants_size = 4; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_shader_acquire_linear (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_linear_pattern_t *linear, + const cairo_rectangle_int_t *extents) +{ + intel_buffer_t buffer; + cairo_status_t status; + double x0, y0, sf; + double dx, dy, offset; + + status = intel_gradient_render (&i965_device (shader->target)->intel, + &linear->base, &buffer); + if (unlikely (status)) + return status; + + src->type.vertex = VS_NONE; + src->type.pattern = PATTERN_LINEAR; + src->type.fragment = FS_LINEAR; + src->base.bo = buffer.bo; + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->base.format = buffer.format; + src->base.width = buffer.width; + src->base.height = buffer.height; + src->base.stride = buffer.stride; + src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR); + src->base.extend = i965_extend (linear->base.base.extend); + + dx = _cairo_fixed_to_double (linear->p2.x - linear->p1.x); + dy = _cairo_fixed_to_double (linear->p2.y - linear->p1.y); + sf = 1. / (dx * dx + dy * dy); + dx *= sf; + dy *= sf; + + x0 = _cairo_fixed_to_double (linear->p1.x); + y0 = _cairo_fixed_to_double (linear->p1.y); + offset = dx*x0 + dy*y0; + + if (_cairo_matrix_is_identity (&linear->base.base.matrix)) { + src->base.matrix.xx = dx; + src->base.matrix.xy = dy; + src->base.matrix.x0 = -offset; + } else { + cairo_matrix_t m; + + cairo_matrix_init (&m, dx, 0, dy, 0, -offset, 0); + cairo_matrix_multiply (&src->base.matrix, &linear->base.base.matrix, &m); + } + src->base.matrix.yx = 0.; + src->base.matrix.yy = 1.; + src->base.matrix.y0 = 0.; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_shader_acquire_radial (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_radial_pattern_t *radial, + const cairo_rectangle_int_t *extents) +{ + intel_buffer_t buffer; + cairo_status_t status; + double dx, dy, dr, r1; + + status = intel_gradient_render (&i965_device (shader->target)->intel, + &radial->base, &buffer); + if (unlikely (status)) + return status; + + src->type.vertex = VS_NONE; + src->type.pattern = PATTERN_RADIAL; + src->type.fragment = FS_RADIAL; + src->base.bo = buffer.bo; + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->base.format = buffer.format; + src->base.width = buffer.width; + src->base.height = buffer.height; + src->base.stride = buffer.stride; + src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR); + src->base.extend = i965_extend (radial->base.base.extend); + + dx = _cairo_fixed_to_double (radial->c2.x - radial->c1.x); + dy = _cairo_fixed_to_double (radial->c2.y - radial->c1.y); + dr = _cairo_fixed_to_double (radial->r2 - radial->r1); + + r1 = _cairo_fixed_to_double (radial->r1); + + if (FALSE && radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) { + /* XXX dr == 0, meaningless with anything other than PAD */ + src->base.constants[0] = _cairo_fixed_to_double (radial->c1.x) / dr; + src->base.constants[1] = _cairo_fixed_to_double (radial->c1.y) / dr; + src->base.constants[2] = 1. / dr; + src->base.constants[3] = -r1 / dr; + + src->base.constants_size = 4; + src->base.mode = RADIAL_ONE; + } else { + src->base.constants[0] = -_cairo_fixed_to_double (radial->c1.x); + src->base.constants[1] = -_cairo_fixed_to_double (radial->c1.y); + src->base.constants[2] = r1; + src->base.constants[3] = -4 * (dx*dx + dy*dy - dr*dr); + + src->base.constants[4] = -2 * dx; + src->base.constants[5] = -2 * dy; + src->base.constants[6] = -2 * r1 * dr; + src->base.constants[7] = 1 / (2 * (dx*dx + dy*dy - dr*dr)); + + src->base.constants_size = 8; + src->base.mode = RADIAL_TWO; + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_surface_clone (i965_device_t *device, + cairo_image_surface_t *image, + i965_surface_t **clone_out) +{ + i965_surface_t *clone; + cairo_status_t status; + + clone = (i965_surface_t *) + i965_surface_create_internal (&device->intel.base, + image->base.content, + image->width, + image->height, + I965_TILING_DEFAULT, + FALSE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + status = intel_bo_put_image (&device->intel, + to_intel_bo (clone->intel.drm.bo), + clone->intel.drm.stride, + image, + 0, 0, + image->width, image->height, + 0, 0); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + status = _cairo_surface_attach_snapshot (&image->base, + &clone->intel.drm.base, + intel_surface_detach_snapshot); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = intel_snapshot_cache_insert (&device->intel, &clone->intel); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_surface_clone_subimage (i965_device_t *device, + cairo_image_surface_t *image, + const cairo_rectangle_int_t *extents, + i965_surface_t **clone_out) +{ + i965_surface_t *clone; + cairo_status_t status; + + clone = (i965_surface_t *) + i965_surface_create_internal (&device->intel.base, + image->base.content, + extents->width, + extents->height, + I965_TILING_DEFAULT, + FALSE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + status = intel_bo_put_image (to_intel_device (clone->intel.drm.base.device), + to_intel_bo (clone->intel.drm.bo), + clone->intel.drm.stride, + image, + extents->x, extents->y, + extents->width, extents->height, + 0, 0); + if (unlikely (status)) + return status; + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_shader_acquire_solid_surface (i965_shader_t *shader, + union i965_shader_channel *src, + cairo_surface_t *surface, + const cairo_rectangle_int_t *extents) +{ + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + uint32_t argb; + + status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); + if (unlikely (status)) + return status; + + if (image->format != CAIRO_FORMAT_ARGB32) { + cairo_surface_t *pixel; + cairo_surface_pattern_t pattern; + + /* extract the pixel as argb32 */ + pixel = cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 1, 1); + _cairo_pattern_init_for_surface (&pattern, &image->base); + cairo_matrix_init_translate (&pattern.base.matrix, extents->x, extents->y); + pattern.base.filter = CAIRO_FILTER_NEAREST; + status = _cairo_surface_paint (pixel, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL); + _cairo_pattern_fini (&pattern.base); + + if (unlikely (status)) { + _cairo_surface_release_source_image (surface, image, image_extra); + cairo_surface_destroy (pixel); + return status; + } + + argb = *(uint32_t *) ((cairo_image_surface_t *) pixel)->data; + cairo_surface_destroy (pixel); + } else { + argb = ((uint32_t *) (image->data + extents->y * image->stride))[extents->x]; + } + + _cairo_surface_release_source_image (surface, image, image_extra); + + if (argb >> 24 == 0) + argb = 0; + + src->base.constants[0] = ((argb >> 16) & 0xff) / 255.; + src->base.constants[1] = ((argb >> 8) & 0xff) / 255.; + src->base.constants[2] = ((argb >> 0) & 0xff) / 255.; + src->base.constants[3] = ((argb >> 24) & 0xff) / 255.; + src->base.constants_size = 4; + + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + if (CAIRO_ALPHA_IS_OPAQUE(src->base.constants[3])) + src->base.content &= ~CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_CONSTANT; + src->type.vertex = VS_NONE; + src->type.pattern = PATTERN_SOLID; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_shader_acquire_surface (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_surface_pattern_t *pattern, + const cairo_rectangle_int_t *extents) +{ + cairo_surface_t *surface, *drm; + cairo_matrix_t m; + cairo_status_t status; + int src_x = 0, src_y = 0; + + assert (src->type.fragment == FS_NONE); + drm = surface = pattern->surface; + +#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS + if (surface->type == CAIRO_SURFACE_TYPE_XCB) { + cairo_surface_t *xcb = surface; + + if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + xcb = ((cairo_surface_subsurface_t *) surface)->target; + } else if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { + xcb = ((cairo_surface_snapshot_t *) surface)->target; + } + + /* XXX copy windows (IncludeInferiors) to a pixmap/drm surface + * xcb = _cairo_xcb_surface_to_drm (xcb) + */ + xcb = ((cairo_xcb_surface_t *) xcb)->drm; + if (xcb != NULL) + drm = xcb; + } +#endif + + if (surface->type == CAIRO_SURFACE_TYPE_DRM) { + if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + drm = ((cairo_surface_subsurface_t *) surface)->target; + } else if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { + drm = ((cairo_surface_snapshot_t *) surface)->target; + } + } + + src->type.pattern = PATTERN_SURFACE; + src->surface.surface = NULL; + if (drm->type == CAIRO_SURFACE_TYPE_DRM) { + i965_surface_t *s = (i965_surface_t *) drm; + + if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + if (s->intel.drm.base.device == shader->target->intel.drm.base.device) { + cairo_surface_subsurface_t *sub = (cairo_surface_subsurface_t *) surface; + if (s != shader->target) { + int x; + + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) + i965_pipelined_flush (i965_device (s)); + + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (s->intel.drm.bo); + src->base.format = s->intel.drm.format; + src->base.content = s->intel.drm.base.content; + src->base.width = sub->extents.width; + src->base.height = sub->extents.height; + src->base.stride = s->intel.drm.stride; + + x = sub->extents.x; + if (s->intel.drm.format != CAIRO_FORMAT_A8) + x *= 4; + + /* XXX tiling restrictions upon offset? */ + //src->base.offset[0] = s->offset + sub->extents.y * s->intel.drm.stride + x; + } else { + i965_surface_t *clone; + cairo_surface_pattern_t pattern; + + clone = (i965_surface_t *) + i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device, + s->intel.drm.base.content, + sub->extents.width, + sub->extents.height, + I965_TILING_DEFAULT, + TRUE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base); + pattern.base.filter = CAIRO_FILTER_NEAREST; + cairo_matrix_init_translate (&pattern.base.matrix, + sub->extents.x, sub->extents.y); + + status = _cairo_surface_paint (&clone->intel.drm.base, + CAIRO_OPERATOR_SOURCE, + &pattern.base, + NULL); + + _cairo_pattern_fini (&pattern.base); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + i965_pipelined_flush (i965_device (s)); + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (clone->intel.drm.bo); + src->base.format = clone->intel.drm.format; + src->base.content = clone->intel.drm.base.content; + src->base.width = clone->intel.drm.width; + src->base.height = clone->intel.drm.height; + src->base.stride = clone->intel.drm.stride; + + src->surface.surface = &clone->intel.drm.base; + } + + src_x = sub->extents.x; + src_y = sub->extents.y; + } + } else { + if (s->intel.drm.base.device == shader->target->intel.drm.base.device) { + if (s != shader->target) { + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) + i965_pipelined_flush (i965_device (s)); + + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (s->intel.drm.bo); + src->base.format = s->intel.drm.format; + src->base.content = s->intel.drm.base.content; + src->base.width = s->intel.drm.width; + src->base.height = s->intel.drm.height; + src->base.stride = s->intel.drm.stride; + } else { + i965_surface_t *clone; + cairo_surface_pattern_t pattern; + + clone = (i965_surface_t *) + i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device, + s->intel.drm.base.content, + s->intel.drm.width, + s->intel.drm.height, + I965_TILING_DEFAULT, + TRUE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base); + pattern.base.filter = CAIRO_FILTER_NEAREST; + status = _cairo_surface_paint (&clone->intel.drm.base, + CAIRO_OPERATOR_SOURCE, + &pattern.base, + NULL); + + _cairo_pattern_fini (&pattern.base); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + i965_pipelined_flush (i965_device (s)); + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (clone->intel.drm.bo); + src->base.format = clone->intel.drm.format; + src->base.content = clone->intel.drm.base.content; + src->base.width = clone->intel.drm.width; + src->base.height = clone->intel.drm.height; + src->base.stride = clone->intel.drm.stride; + + src->surface.surface = &clone->intel.drm.base; + } + } + } + } + + if (src->type.fragment == FS_NONE) { + i965_surface_t *s; + + if (extents->width == 1 && extents->height == 1) { + return i965_shader_acquire_solid_surface (shader, src, + surface, extents); + } + + s = (i965_surface_t *) + _cairo_surface_has_snapshot (surface, + shader->target->intel.drm.base.backend); + if (s != NULL) { + i965_device_t *device = i965_device (shader->target); + intel_bo_t *bo = to_intel_bo (s->intel.drm.bo); + + if (bo->purgeable && + ! intel_bo_madvise (&device->intel, bo, I915_MADV_WILLNEED)) + { + _cairo_surface_detach_snapshot (&s->intel.drm.base); + s = NULL; + } + + if (s != NULL) + cairo_surface_reference (&s->intel.drm.base); + } + + if (s == NULL) { + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + + status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); + if (unlikely (status)) + return status; + + if (image->width < 8192 && image->height < 8192) { + status = i965_surface_clone (i965_device (shader->target), image, &s); + } else { + status = i965_surface_clone_subimage (i965_device (shader->target), + image, extents, &s); + src_x = -extents->x; + src_y = -extents->y; + } + + _cairo_surface_release_source_image (surface, image, image_extra); + + if (unlikely (status)) + return status; + + /* XXX? */ + //intel_bo_mark_purgeable (to_intel_bo (s->intel.drm.bo), TRUE); + } + + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (s->intel.drm.bo); + src->base.content = s->intel.drm.base.content; + src->base.format = s->intel.drm.format; + src->base.width = s->intel.drm.width; + src->base.height = s->intel.drm.height; + src->base.stride = s->intel.drm.stride; + + src->surface.surface = &s->intel.drm.base; + + drm = &s->intel.drm.base; + } + + /* XXX transform nx1 or 1xn surfaces to 1D? */ + + src->type.vertex = VS_NONE; + + src->base.extend = i965_extend (pattern->base.extend); + if (pattern->base.extend == CAIRO_EXTEND_NONE && + extents->x >= 0 && extents->y >= 0 && + extents->x + extents->width <= src->base.width && + extents->y + extents->height <= src->base.height) + { + /* Convert a wholly contained NONE to a REFLECT as the contiguous sampler + * cannot not handle CLAMP_BORDER textures. + */ + src->base.extend = i965_extend (CAIRO_EXTEND_REFLECT); + /* XXX also need to check |u,v| < 3 */ + } + + src->base.filter = i965_filter (pattern->base.filter); + if (_cairo_matrix_is_pixel_exact (&pattern->base.matrix)) + src->base.filter = i965_filter (CAIRO_FILTER_NEAREST); + + /* tweak the src matrix to map from dst to texture coordinates */ + src->base.matrix = pattern->base.matrix; + if (src_x | src_y) + cairo_matrix_translate (&src->base.matrix, src_x, src_x); + if (src->base.filter == BRW_MAPFILTER_NEAREST) + cairo_matrix_translate (&src->base.matrix, NEAREST_BIAS, NEAREST_BIAS); + cairo_matrix_init_scale (&m, 1. / src->base.width, 1. / src->base.height); + cairo_matrix_multiply (&src->base.matrix, &src->base.matrix, &m); + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +i965_shader_acquire_pattern (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_pattern_t *pattern, + const cairo_rectangle_int_t *extents) +{ + switch (pattern->type) { + case CAIRO_PATTERN_TYPE_SOLID: + return i965_shader_acquire_solid (shader, src, + (cairo_solid_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_LINEAR: + return i965_shader_acquire_linear (shader, src, + (cairo_linear_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_RADIAL: + return i965_shader_acquire_radial (shader, src, + (cairo_radial_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_SURFACE: + return i965_shader_acquire_surface (shader, src, + (cairo_surface_pattern_t *) pattern, + extents); + + default: + ASSERT_NOT_REACHED; + return CAIRO_STATUS_SUCCESS; + } +} + +static void +i965_shader_channel_init (union i965_shader_channel *channel) +{ + channel->type.vertex = VS_NONE; + channel->type.fragment = FS_NONE; + channel->type.pattern = PATTERN_NONE; + + channel->base.mode = 0; + channel->base.bo = NULL; + channel->base.filter = i965_extend (CAIRO_FILTER_NEAREST); + channel->base.extend = i965_extend (CAIRO_EXTEND_NONE); + channel->base.has_component_alpha = 0; + channel->base.constants_size = 0; +} + +void +i965_shader_init (i965_shader_t *shader, + i965_surface_t *dst, + cairo_operator_t op) +{ + shader->committed = FALSE; + shader->device = i965_device (dst); + shader->target = dst; + shader->op = op; + shader->constants_size = 0; + + shader->need_combine = FALSE; + + i965_shader_channel_init (&shader->source); + i965_shader_channel_init (&shader->mask); + i965_shader_channel_init (&shader->clip); + i965_shader_channel_init (&shader->dst); +} + +void +i965_shader_fini (i965_shader_t *shader) +{ + if (shader->source.type.pattern == PATTERN_SURFACE) + cairo_surface_destroy (shader->source.surface.surface); + if (shader->mask.type.pattern == PATTERN_SURFACE) + cairo_surface_destroy (shader->mask.surface.surface); + if (shader->clip.type.pattern == PATTERN_SURFACE) + cairo_surface_destroy (shader->clip.surface.surface); + if (shader->dst.type.pattern == PATTERN_SURFACE) + cairo_surface_destroy (shader->dst.surface.surface); +} + +void +i965_shader_set_clip (i965_shader_t *shader, + cairo_clip_t *clip) +{ + cairo_surface_t *clip_surface; + union i965_shader_channel *channel; + i965_surface_t *s; + + clip_surface = _cairo_clip_get_surface (clip, &shader->target->intel.drm.base); + assert (clip_surface->status == CAIRO_STATUS_SUCCESS); + assert (clip_surface->type == CAIRO_SURFACE_TYPE_DRM); + s = (i965_surface_t *) clip_surface; + + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) + i965_pipelined_flush (i965_device (s)); + + channel = &shader->clip; + channel->type.pattern = PATTERN_BASE; + channel->type.vertex = VS_NONE; + channel->type.fragment = FS_SURFACE; + + channel->base.bo = to_intel_bo (s->intel.drm.bo); + channel->base.content = CAIRO_CONTENT_ALPHA; + channel->base.format = CAIRO_FORMAT_A8; + channel->base.width = s->intel.drm.width; + channel->base.height = s->intel.drm.height; + channel->base.stride = s->intel.drm.stride; + + channel->base.extend = i965_extend (CAIRO_EXTEND_NONE); + channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST); + + cairo_matrix_init_scale (&shader->clip.base.matrix, + 1. / s->intel.drm.width, + 1. / s->intel.drm.height); + + cairo_matrix_translate (&shader->clip.base.matrix, + NEAREST_BIAS + clip->path->extents.x, + NEAREST_BIAS + clip->path->extents.y); +} + +static cairo_bool_t +i965_shader_check_aperture (i965_shader_t *shader, + i965_device_t *device) +{ + uint32_t size = device->exec.gtt_size; + + if (shader->target != device->target) { + const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); + if (bo->exec == NULL) + size += bo->base.size; + } + + if (shader->source.base.bo != NULL && shader->source.base.bo != device->source) { + const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); + if (bo->exec == NULL) + size += bo->base.size; + } + + if (shader->mask.base.bo != NULL && shader->mask.base.bo != device->mask) { + const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); + if (bo->exec == NULL) + size += bo->base.size; + } + + if (shader->clip.base.bo != NULL && shader->clip.base.bo != device->clip) { + const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); + if (bo->exec == NULL) + size += bo->base.size; + } + + return size <= device->intel.gtt_avail_size; +} + +static cairo_status_t +i965_shader_setup_dst (i965_shader_t *shader) +{ + union i965_shader_channel *channel; + i965_surface_t *s, *clone; + + /* We need to manual blending if we have a clip surface and an unbounded op, + * or an extended blend mode. + */ + if (shader->need_combine || + (shader->op < CAIRO_OPERATOR_SATURATE && + (shader->clip.type.fragment == FS_NONE || + _cairo_operator_bounded_by_mask (shader->op)))) + { + return CAIRO_STATUS_SUCCESS; + } + + shader->need_combine = TRUE; + + s = shader->target; + + /* we need to allocate a new render target and use the original as a source */ + clone = (i965_surface_t *) + i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device, + s->intel.drm.base.content, + s->intel.drm.width, + s->intel.drm.height, + I965_TILING_DEFAULT, + TRUE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) + i965_pipelined_flush (i965_device (s)); + + channel = &shader->dst; + + channel->type.vertex = VS_NONE; + channel->type.fragment = FS_SURFACE; + channel->type.pattern = PATTERN_SURFACE; + + /* swap buffer objects */ + channel->base.bo = to_intel_bo (s->intel.drm.bo); + s->intel.drm.bo = ((cairo_drm_surface_t *) clone)->bo; + ((cairo_drm_surface_t *) clone)->bo = &channel->base.bo->base; + + channel->base.content = s->intel.drm.base.content; + channel->base.format = s->intel.drm.format; + channel->base.width = s->intel.drm.width; + channel->base.height = s->intel.drm.height; + channel->base.stride = s->intel.drm.stride; + + channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST); + channel->base.extend = i965_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_scale (&channel->base.matrix, + 1. / s->intel.drm.width, + 1. / s->intel.drm.height); + cairo_matrix_translate (&channel->base.matrix, + NEAREST_BIAS, + NEAREST_BIAS); + + channel->surface.surface = &clone->intel.drm.base; + + s->intel.drm.base.content = clone->intel.drm.base.content; + s->intel.drm.format = clone->intel.drm.format; + assert (s->intel.drm.width == clone->intel.drm.width); + assert (s->intel.drm.height == clone->intel.drm.height); + s->intel.drm.stride = clone->intel.drm.stride; + + return CAIRO_STATUS_SUCCESS; +} + +static inline void +constant_add_float (i965_shader_t *shader, float v) +{ + shader->constants[shader->constants_size++] = v; +} + +static inline void +i965_shader_copy_channel_constants (i965_shader_t *shader, + const union i965_shader_channel *channel) +{ + if (channel->base.constants_size) { + assert (shader->constants_size + channel->base.constants_size < ARRAY_LENGTH (shader->constants)); + + memcpy (shader->constants + shader->constants_size, + channel->base.constants, + sizeof (float) * channel->base.constants_size); + shader->constants_size += channel->base.constants_size; + } +} + +static void +i965_shader_setup_channel_constants (i965_shader_t *shader, + const union i965_shader_channel *channel) +{ + switch (channel->type.fragment) { + case FS_NONE: + case FS_CONSTANT: + /* no plane equations */ + break; + + case FS_LINEAR: + constant_add_float (shader, channel->base.matrix.xx); + constant_add_float (shader, channel->base.matrix.xy); + constant_add_float (shader, 0); + constant_add_float (shader, channel->base.matrix.x0); + break; + + case FS_RADIAL: + case FS_SURFACE: + constant_add_float (shader, channel->base.matrix.xx); + constant_add_float (shader, channel->base.matrix.xy); + constant_add_float (shader, 0); + constant_add_float (shader, channel->base.matrix.x0); + + constant_add_float (shader, channel->base.matrix.yx); + constant_add_float (shader, channel->base.matrix.yy); + constant_add_float (shader, 0); + constant_add_float (shader, channel->base.matrix.y0); + break; + + case FS_SPANS: + case FS_GLYPHS: + /* use pue from SF */ + break; + } + + i965_shader_copy_channel_constants (shader, channel); +} + +static void +i965_shader_setup_constants (i965_shader_t *shader) +{ + i965_shader_setup_channel_constants (shader, &shader->source); + i965_shader_setup_channel_constants (shader, &shader->mask); + i965_shader_setup_channel_constants (shader, &shader->clip); + i965_shader_setup_channel_constants (shader, &shader->dst); + assert (shader->constants_size < ARRAY_LENGTH (shader->constants)); +} + +/** + * Highest-valued BLENDFACTOR used in i965_blend_op. + * + * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, + * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, + * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} + */ +#define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) + +static void +i965_shader_get_blend_cntl (const i965_shader_t *shader, + uint32_t *sblend, uint32_t *dblend) +{ + static const struct blendinfo { + cairo_bool_t dst_alpha; + cairo_bool_t src_alpha; + uint32_t src_blend; + uint32_t dst_blend; + } i965_blend_op[] = { + /* CAIRO_OPERATOR_CLEAR treat as SOURCE with transparent */ + {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, + /* CAIRO_OPERATOR_SOURCE */ + {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, + /* CAIRO_OPERATOR_OVER */ + {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* CAIRO_OPERATOR_IN */ + {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, + /* CAIRO_OPERATOR_OUT */ + {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, + /* CAIRO_OPERATOR_ATOP */ + {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + + /* CAIRO_OPERATOR_DEST */ + {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, + /* CAIRO_OPERATOR_DEST_OVER */ + {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, + /* CAIRO_OPERATOR_DEST_IN */ + {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, + /* CAIRO_OPERATOR_DEST_OUT */ + {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* CAIRO_OPERATOR_DEST_ATOP */ + {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, + /* CAIRO_OPERATOR_XOR */ + {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* CAIRO_OPERATOR_ADD */ + {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, + }; + const struct blendinfo *op = &i965_blend_op[shader->op]; + + *sblend = op->src_blend; + *dblend = op->dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (shader->target->intel.drm.base.content == CAIRO_CONTENT_COLOR && + op->dst_alpha) + { + if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) + *sblend = BRW_BLENDFACTOR_ONE; + else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) + *sblend = BRW_BLENDFACTOR_ZERO; + } +} + +static void +emit_wm_subpans_to_pixels (struct brw_compile *compile, + int tmp) +{ + /* Inputs: + * R1.5 x/y of upper-left pixel of subspan 3 + * R1.4 x/y of upper-left pixel of subspan 2 + * R1.3 x/y of upper-left pixel of subspan 1 + * R1.2 x/y of upper-left pixel of subspan 0 + * + * Outputs: + * M1,2: u + * M3,4: v + * + * upper left, upper right, lower left, lower right. + */ + + /* compute pixel locations for each subspan */ + brw_set_compression_control (compile, BRW_COMPRESSION_NONE); + brw_ADD (compile, + brw_vec8_grf (tmp), + brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 4, + BRW_REGISTER_TYPE_UW, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_NOOP, + WRITEMASK_XYZW), + brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE)); + brw_ADD (compile, + brw_vec8_grf (tmp+1), + brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 8, + BRW_REGISTER_TYPE_UW, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_NOOP, + WRITEMASK_XYZW), + brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE)); + brw_ADD (compile, + brw_vec8_grf (tmp+2), + brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 5, + BRW_REGISTER_TYPE_UW, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_NOOP, + WRITEMASK_XYZW), + brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE)); + brw_ADD (compile, + brw_vec8_grf (tmp+3), + brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 9, + BRW_REGISTER_TYPE_UW, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_NOOP, + WRITEMASK_XYZW), + brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE)); + brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED); +} + +static void +emit_wm_affine (struct brw_compile *compile, + int tmp, int reg, int msg) +{ + emit_wm_subpans_to_pixels (compile, tmp); + + brw_LINE (compile, + brw_null_reg (), + brw_vec1_grf (reg, 0), + brw_vec8_grf (tmp)); + brw_MAC (compile, + brw_message_reg (msg + 1), + brw_vec1_grf (reg, 1), + brw_vec8_grf (tmp+2)); + + brw_LINE (compile, + brw_null_reg (), + brw_vec1_grf (reg, 4), + brw_vec8_grf (tmp)); + brw_MAC (compile, + brw_message_reg (msg + 3), + brw_vec1_grf (reg, 5), + brw_vec8_grf (tmp+2)); +} + +static void +emit_wm_glyph (struct brw_compile *compile, + int tmp, int vue, int msg) +{ + emit_wm_subpans_to_pixels (compile, tmp); + + brw_MUL (compile, + brw_null_reg (), + brw_vec8_grf (tmp), + brw_imm_f (1./1024)); + brw_ADD (compile, + brw_message_reg (msg + 1), + brw_acc_reg (), + brw_vec1_grf (vue, 0)); + + brw_MUL (compile, + brw_null_reg (), + brw_vec8_grf (tmp + 2), + brw_imm_f (1./1024)); + brw_ADD (compile, + brw_message_reg (msg + 3), + brw_acc_reg (), + brw_vec1_grf (vue, 1)); +} + +static void +emit_wm_load_constant (struct brw_compile *compile, + int reg, + struct brw_reg *result) +{ + int n; + + for (n = 0; n < 4; n++) { + result[n] = result[n+4] = brw_reg (BRW_GENERAL_REGISTER_FILE, reg, n, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_XYZW); + } +} + +static void +emit_wm_load_opacity (struct brw_compile *compile, + int reg, + struct brw_reg *result) +{ + result[0] = result[1] = result[2] = result[3] = + result[4] = result[5] = result[6] = result[7] = + brw_reg (BRW_GENERAL_REGISTER_FILE, reg, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XXXX, + WRITEMASK_XYZW); +} + +static void +emit_wm_load_linear (struct brw_compile *compile, + int tmp, int reg, int msg) +{ + emit_wm_subpans_to_pixels (compile, tmp); + + brw_LINE (compile, + brw_null_reg(), + brw_vec1_grf (reg, 0), + brw_vec8_grf (tmp)); + brw_MAC (compile, + brw_message_reg(msg + 1), + brw_vec1_grf (reg, 1), + brw_vec8_grf (tmp + 2)); +} + +static void +emit_wm_load_radial (struct brw_compile *compile, + int reg, int msg) + +{ + struct brw_reg c1x = brw_vec1_grf (reg, 0); + struct brw_reg c1y = brw_vec1_grf (reg, 1); + struct brw_reg minus_r_sq = brw_vec1_grf (reg, 3); + struct brw_reg cdx = brw_vec1_grf (reg, 4); + struct brw_reg cdy = brw_vec1_grf (reg, 5); + struct brw_reg neg_4a = brw_vec1_grf (reg + 1, 0); + struct brw_reg inv_2a = brw_vec1_grf (reg + 1, 1); + + struct brw_reg tmp_x = brw_uw16_grf (30, 0); + struct brw_reg tmp_y = brw_uw16_grf (28, 0); + struct brw_reg det = brw_vec8_grf (22); + struct brw_reg b = brw_vec8_grf (20); + struct brw_reg c = brw_vec8_grf (18); + struct brw_reg pdx = brw_vec8_grf (16); + struct brw_reg pdy = brw_vec8_grf (14); + struct brw_reg t = brw_message_reg (msg + 1); + + /* cdx = (c₂x - c₁x) + * cdy = (c₂y - c₁y) + * dr = r₂-r₁ + * pdx = px - c₁x + * pdy = py - c₁y + * + * A = cdx² + cdy² - dr² + * B = -2·(pdx·cdx + pdy·cdy + r₁·dr) + * C = pdx² + pdy² - r₁² + * + * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A + */ + + brw_ADD (compile, pdx, vec8 (tmp_x), negate (c1x)); + brw_ADD (compile, pdy, vec8 (tmp_y), negate (c1y)); + + brw_LINE (compile, brw_null_reg (), cdx, pdx); + brw_MAC (compile, b, cdy, pdy); + + brw_MUL (compile, brw_null_reg (), pdx, pdx); + brw_MAC (compile, c, pdy, pdy); + brw_ADD (compile, c, c, minus_r_sq); + + brw_MUL (compile, brw_null_reg (), b, b); + brw_MAC (compile, det, neg_4a, c); + + /* XXX use rsqrt like i915?, it's faster and we need to mac anyway */ + brw_math (compile, + det, + BRW_MATH_FUNCTION_SQRT, + BRW_MATH_SATURATE_NONE, + 2, + det, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + /* XXX cmp, +- */ + + brw_ADD (compile, det, negate (det), negate (b)); + brw_ADD (compile, det, det, negate (b)); + brw_MUL (compile, t, det, inv_2a); +} + +static int +emit_wm_sample (struct brw_compile *compile, + union i965_shader_channel *channel, + int sampler, + int msg_base, int msg_len, + int dst, + struct brw_reg *result) +{ + int response_len, mask; + + if (channel->base.content == CAIRO_CONTENT_ALPHA) { + mask = 0x7000; + response_len = 2; + result[0] = result[1] = result[2] = result[3] = brw_vec8_grf (dst); + result[4] = result[5] = result[6] = result[7] = brw_vec8_grf (dst + 1); + } else { + mask = 0; + response_len = 8; + result[0] = brw_vec8_grf (dst + 0); + result[1] = brw_vec8_grf (dst + 2); + result[2] = brw_vec8_grf (dst + 4); + result[3] = brw_vec8_grf (dst + 6); + result[4] = brw_vec8_grf (dst + 1); + result[5] = brw_vec8_grf (dst + 3); + result[6] = brw_vec8_grf (dst + 5); + result[7] = brw_vec8_grf (dst + 7); + } + + brw_set_compression_control (compile, BRW_COMPRESSION_NONE); + + brw_set_mask_control (compile, BRW_MASK_DISABLE); + brw_MOV (compile, + get_element_ud (brw_vec8_grf (0), 2), + brw_imm_ud (mask)); + brw_set_mask_control (compile, BRW_MASK_ENABLE); + + brw_SAMPLE (compile, + brw_uw16_grf (dst, 0), + msg_base, + brw_uw8_grf (0, 0), + sampler + 1, /* binding table */ + sampler, + WRITEMASK_XYZW, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE, + response_len, + msg_len, + 0 /* eot */); + + brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED); + + return response_len; +} + +#define MAX_MSG_REGISTER 16 + +static void +emit_wm_load_channel (struct brw_compile *compile, + union i965_shader_channel *channel, + int *vue, + int *cue, + int *msg, + int *sampler, + int *grf, + struct brw_reg *result) +{ + switch (channel->type.fragment) { + case FS_NONE: + break; + + case FS_CONSTANT: + emit_wm_load_constant (compile, *cue, result); + *cue += 1; + break; + + case FS_RADIAL: + emit_wm_load_radial (compile, *cue, *msg); + *cue += 2; + + if (*msg + 3 > MAX_MSG_REGISTER) + *msg = 1; + + *grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result); + *sampler += 1; + *msg += 3; + break; + + case FS_LINEAR: + emit_wm_load_linear (compile, *grf, *cue, *msg); + *cue += 1; + + if (*msg + 3 > MAX_MSG_REGISTER) + *msg = 1; + + *grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result); + *sampler += 1; + *msg += 3; + break; + + case FS_SURFACE: + emit_wm_affine (compile, *grf, *cue, *msg); + *cue += 2; + + if (*msg + 5 > MAX_MSG_REGISTER) + *msg = 1; + + *grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result); + *sampler += 1; + *msg += 5; + break; + + case FS_SPANS: + emit_wm_load_opacity (compile, *vue, result); + *vue += 1; + break; + + case FS_GLYPHS: + emit_wm_glyph (compile, *grf, *vue, *msg); + *vue += 1; + + if (*msg + 5 > MAX_MSG_REGISTER) + *msg = 1; + + *grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result); + *sampler += 1; + *msg += 5; + break; + } +} + +static unsigned long +i965_wm_kernel_hash (const i965_shader_t *shader) +{ + unsigned long hash; + + hash = + (shader->source.type.fragment & 0xff) | + (shader->mask.type.fragment & 0xff) << 8 | + (shader->clip.type.fragment & 0xff) << 16; + if (shader->need_combine) + hash |= (1 + shader->op) << 24; + + return hash; +} + +static void +i965_wm_kernel_init (struct i965_wm_kernel *key, + const i965_shader_t *shader) +{ + key->entry.hash = i965_wm_kernel_hash (shader); +} + +static uint32_t +i965_shader_const_urb_length (i965_shader_t *shader) +{ + const int lengths[] = { 0, 1, 1, 4, 2, 0, 0 }; + int count = 0; /* 128-bit/16-byte increments */ + + count += lengths[shader->source.type.fragment]; + count += lengths[shader->mask.type.fragment]; + count += lengths[shader->clip.type.fragment]; + count += lengths[shader->dst.type.fragment]; + + return (count + 1) / 2; /* 256-bit/32-byte increments */ +} + +static uint32_t +i965_shader_pue_length (i965_shader_t *shader) +{ + return 1 + (shader->mask.type.vertex != VS_NONE); +} + +static uint32_t +create_wm_kernel (i965_device_t *device, + i965_shader_t *shader, + int *num_reg) +{ + struct brw_compile compile; + struct brw_reg source[8], mask[8], clip[8], dst[8]; + const uint32_t *program; + uint32_t size; + int msg, cue, vue, grf, sampler; + int i; + + struct i965_wm_kernel key, *cache; + cairo_status_t status; + uint32_t offset; + + i965_wm_kernel_init (&key, shader); + cache = _cairo_hash_table_lookup (device->wm_kernels, &key.entry); + if (cache != NULL) + return cache->offset; + + brw_compile_init (&compile, device->is_g4x); + + if (key.entry.hash == FS_CONSTANT && + to_intel_bo (shader->target->intel.drm.bo)->tiling) + { + struct brw_instruction *insn; + + assert (i965_shader_const_urb_length (shader) == 1); + brw_MOV (&compile, brw_message4_reg (2), brw_vec4_grf (2, 0)); + grf = 3; + + brw_push_insn_state (&compile); + brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */ + brw_MOV (&compile, + retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD), + retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state (&compile); + + insn = brw_next_instruction (&compile, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = 0; + + brw_instruction_set_destination (insn, + retype (vec16 (brw_acc_reg ()), + BRW_REGISTER_TYPE_UW)); + + brw_instruction_set_source0 (insn, + retype (brw_vec8_grf (0), + BRW_REGISTER_TYPE_UW)); + + brw_instruction_set_dp_write_message (insn, + 0, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + 3, + 1, /* pixel scoreboard */ + 0, + TRUE); + } + else + { + msg = 1; + cue = 2; + vue = cue + i965_shader_const_urb_length (shader); + grf = vue + i965_shader_pue_length (shader); + sampler = 0; + + brw_set_compression_control (&compile, BRW_COMPRESSION_COMPRESSED); + emit_wm_load_channel (&compile, &shader->source, + &vue, &cue, &msg, &sampler, &grf, + source); + emit_wm_load_channel (&compile, &shader->mask, + &vue, &cue, &msg, &sampler, &grf, + mask); + emit_wm_load_channel (&compile, &shader->clip, + &vue, &cue, &msg, &sampler, &grf, + clip); + emit_wm_load_channel (&compile, &shader->dst, + &vue, &cue, &msg, &sampler, &grf, + dst); + brw_set_compression_control (&compile, BRW_COMPRESSION_NONE); + + if (shader->need_combine) { + if (shader->mask.type.fragment != FS_NONE && + shader->clip.type.fragment != FS_NONE) + { + for (i = 0; i < 8; i++) + brw_MUL (&compile, mask[i], mask[i], clip[i]); + } + + /* XXX LERP ! */ + for (i = 0; i < 8; i++) + brw_MOV (&compile, brw_message_reg (2 + i), source[i]); + } else { + if (shader->mask.type.fragment != FS_NONE) { + if (shader->clip.type.fragment != FS_NONE) { + for (i = 0; i < 8; i++) + brw_MUL (&compile, mask[i], mask[i], clip[i]); + } + + for (i = 0; i < 8; i++) + brw_MUL (&compile, brw_message_reg (2 + i), source[i], mask[i]); + } else { + if (shader->clip.type.fragment != FS_NONE) { + for (i = 0; i < 8; i++) + brw_MUL (&compile, brw_message_reg (2 + i), source[i], clip[i]); + } else { + for (i = 0; i < 8; i++) + brw_MOV (&compile, brw_message_reg (2 + i), source[i]); + } + } + } + + brw_push_insn_state (&compile); + brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */ + brw_MOV (&compile, + retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD), + retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state (&compile); + + brw_fb_WRITE (&compile, + retype (vec16 (brw_acc_reg ()), BRW_REGISTER_TYPE_UW), + 0, /* base reg */ + retype (brw_vec8_grf (0), BRW_REGISTER_TYPE_UW), + 0, /* binding table index */ + 2 + 8, /* msg length */ + 0, /* response length */ + TRUE); /* EOT */ + } + + program = brw_get_program (&compile, &size); + *num_reg = grf; + + i965_stream_align (&device->general, 64); + offset = i965_stream_emit (&device->general, program, size); + + cache = _cairo_freelist_alloc (&device->wm_kernel_freelist); + if (likely (cache != NULL)) { + i965_wm_kernel_init (cache, shader); + cache->offset = offset; + status = _cairo_hash_table_insert (device->wm_kernels, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->wm_kernel_freelist, cache); + } + + return offset; +} + +static uint32_t +create_sf_kernel (i965_device_t *device, + i965_shader_t *shader) +{ + struct brw_compile compile; + const uint32_t *program; + uint32_t size; + int msg_len; + + brw_compile_init (&compile, device->is_g4x); + + switch (shader->mask.type.vertex) { + default: + case VS_NONE: + /* use curb plane eq in WM */ + msg_len = 1; + break; + + case VS_SPANS: + /* just a constant opacity */ + brw_MOV (&compile, + brw_message4_reg (1), + brw_vec4_grf (3, 0)); + msg_len = 2; + break; + + case VS_GLYPHS: + /* an offset+sf into the glyph cache */ + brw_MOV (&compile, + brw_acc_reg (), + brw_vec2_grf (3, 0)); + brw_MAC (&compile, + brw_message4_reg (1), + negate (brw_vec2_grf (1, 4)), + brw_imm_f (1./1024)); + msg_len = 2; + break; + } + + brw_urb_WRITE (&compile, + brw_null_reg (), + 0, + brw_vec8_grf (0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + msg_len, + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* offset */ + BRW_URB_SWIZZLE_NONE); + + program = brw_get_program (&compile, &size); + + i965_stream_align (&device->general, 64); + return i965_stream_emit (&device->general, program, size); +} + +static uint32_t +i965_sf_kernel (const i965_shader_t *shader) +{ + return shader->mask.type.vertex; +} + +static void +i965_sf_state_init (struct i965_sf_state *key, + const i965_shader_t *shader) +{ + key->entry.hash = i965_sf_kernel (shader); +} + +cairo_bool_t +i965_sf_state_equal (const void *A, const void *B) +{ + const cairo_hash_entry_t *a = A, *b = B; + return a->hash == b->hash; +} + +/** + * Sets up the SF state pointing at an SF kernel. + * + * The SF kernel does coord interp: for each attribute, + * calculate dA/dx and dA/dy. Hand these interpolation coefficients + * back to SF which then hands pixels off to WM. + */ +static uint32_t +gen4_create_sf_state (i965_device_t *device, + i965_shader_t *shader) +{ + struct brw_sf_unit_state *state; + struct i965_sf_state key, *cache; + cairo_status_t status; + uint32_t offset; + + i965_sf_state_init (&key, shader); + if (i965_sf_state_equal (&key, &device->sf_state)) + return device->sf_state.offset; + + cache = _cairo_hash_table_lookup (device->sf_states, &key.entry); + if (cache != NULL) { + offset = cache->offset; + goto DONE; + } + + offset = create_sf_kernel (device, shader); + + state = i965_stream_alloc (&device->general, 32, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + state->thread0.grf_reg_count = BRW_GRF_BLOCKS (3); + assert ((offset & 63) == 0); + state->thread0.kernel_start_pointer = offset >> 6; + state->sf1.single_program_flow = 1; + state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + state->thread3.urb_entry_read_offset = 1; + state->thread3.dispatch_grf_start_reg = 3; + state->thread4.max_threads = SF_MAX_THREADS - 1; + state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + state->thread4.nr_urb_entries = URB_SF_ENTRIES; + state->sf6.dest_org_vbias = 0x8; + state->sf6.dest_org_hbias = 0x8; + + offset = i965_stream_offsetof (&device->general, state); + + cache = _cairo_freelist_alloc (&device->sf_freelist); + if (likely (cache != NULL)) { + i965_sf_state_init (cache, shader); + cache->offset = offset; + status = _cairo_hash_table_insert (device->sf_states, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->sf_freelist, cache); + } + + DONE: + i965_sf_state_init (&device->sf_state, shader); + device->sf_state.offset = offset; + + return offset; +} + +static unsigned long +i965_shader_sampler_hash (const i965_shader_t *shader) +{ + unsigned long hash = 0; + unsigned int offset = 0; + + if (shader->source.base.bo != NULL) { + hash |= (shader->source.base.filter << offset) | + (shader->source.base.extend << (offset + 4)); + offset += 8; + } + + if (shader->mask.base.bo != NULL) { + hash |= (shader->mask.base.filter << offset) | + (shader->mask.base.extend << (offset + 4)); + offset += 8; + } + + if (shader->clip.base.bo != NULL) { + hash |= (shader->clip.base.filter << offset) | + (shader->clip.base.extend << (offset + 4)); + offset += 8; + } + + if (shader->dst.base.bo != NULL) { + hash |= (shader->dst.base.filter << offset) | + (shader->dst.base.extend << (offset + 4)); + offset += 8; + } + + return hash; +} + +static void +i965_sampler_init (struct i965_sampler *key, + const i965_shader_t *shader) +{ + key->entry.hash = i965_shader_sampler_hash (shader); +} + +static void +emit_sampler_channel (i965_device_t *device, + const union i965_shader_channel *channel, + uint32_t border_color) +{ + struct brw_sampler_state *state; + + state = i965_stream_alloc (&device->general, 0, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + state->ss0.lod_preclamp = 1; /* GL mode */ + + state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; + + state->ss0.min_filter = channel->base.filter; + state->ss0.mag_filter = channel->base.filter; + + state->ss1.r_wrap_mode = channel->base.extend; + state->ss1.s_wrap_mode = channel->base.extend; + state->ss1.t_wrap_mode = channel->base.extend; + + assert ((border_color & 31) == 0); + state->ss2.border_color_pointer = border_color >> 5; +} + +static uint32_t +emit_sampler_state_table (i965_device_t *device, + i965_shader_t *shader) +{ + struct i965_sampler key, *cache; + cairo_status_t status; + uint32_t offset; + + if (device->border_color_offset == (uint32_t) -1) { + struct brw_sampler_legacy_border_color *border_color; + + border_color = i965_stream_alloc (&device->general, 32, + sizeof (*border_color)); + border_color->color[0] = 0; /* R */ + border_color->color[1] = 0; /* G */ + border_color->color[2] = 0; /* B */ + border_color->color[3] = 0; /* A */ + + device->border_color_offset = i965_stream_offsetof (&device->general, + border_color); + } else { + i965_sampler_init (&key, shader); + cache = _cairo_hash_table_lookup (device->samplers, &key.entry); + if (cache != NULL) + return cache->offset; + } + + i965_stream_align (&device->general, 32); + offset = device->general.used; + if (shader->source.base.bo != NULL) { + emit_sampler_channel (device, + &shader->source, + device->border_color_offset); + } + if (shader->mask.base.bo != NULL) { + emit_sampler_channel (device, + &shader->mask, + device->border_color_offset); + } + if (shader->clip.base.bo != NULL) { + emit_sampler_channel (device, + &shader->clip, + device->border_color_offset); + } + if (shader->dst.base.bo != NULL) { + emit_sampler_channel (device, + &shader->dst, + device->border_color_offset); + } + + cache = _cairo_freelist_alloc (&device->sampler_freelist); + if (likely (cache != NULL)) { + i965_sampler_init (cache, shader); + cache->offset = offset; + status = _cairo_hash_table_insert (device->samplers, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->sampler_freelist, cache); + } + + return offset; +} + +static void +i965_cc_state_init (struct i965_cc_state *key, + const i965_shader_t *shader) +{ + uint32_t src_blend, dst_blend; + + if (shader->need_combine) + src_blend = dst_blend = 0; + else + i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend); + + key->entry.hash = src_blend | ((dst_blend & 0xffff) << 16); +} + +cairo_bool_t +i965_cc_state_equal (const void *A, const void *B) +{ + const cairo_hash_entry_t *a = A, *b = B; + return a->hash == b->hash; +} + +static uint32_t +cc_state_emit (i965_device_t *device, i965_shader_t *shader) +{ + struct brw_cc_unit_state *state; + struct i965_cc_state key, *cache; + cairo_status_t status; + uint32_t src_blend, dst_blend; + uint32_t offset; + + i965_cc_state_init (&key, shader); + if (i965_cc_state_equal (&key, &device->cc_state)) + return device->cc_state.offset; + + cache = _cairo_hash_table_lookup (device->cc_states, &key.entry); + if (cache != NULL) { + offset = cache->offset; + goto DONE; + } + + if (shader->need_combine) + src_blend = dst_blend = 0; + else + i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend); + + state = i965_stream_alloc (&device->general, 64, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + /* XXX Note errata, need to flush render cache when blend_enable 0 -> 1 */ + /* XXX 2 source blend */ + state->cc3.blend_enable = ! shader->need_combine; + state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; + state->cc5.ia_src_blend_factor = src_blend; + state->cc5.ia_dest_blend_factor = dst_blend; + state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; + state->cc6.clamp_post_alpha_blend = 1; + state->cc6.clamp_pre_alpha_blend = 1; + state->cc6.src_blend_factor = src_blend; + state->cc6.dest_blend_factor = dst_blend; + + offset = i965_stream_offsetof (&device->general, state); + + cache = _cairo_freelist_alloc (&device->cc_freelist); + if (likely (cache != NULL)) { + i965_cc_state_init (cache, shader); + cache->offset = offset; + status = _cairo_hash_table_insert (device->cc_states, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->cc_freelist, cache); + } + + DONE: + i965_cc_state_init (&device->cc_state, shader); + device->cc_state.offset = offset; + + return offset; +} + +static void +i965_wm_state_init (struct i965_wm_state *key, + const i965_shader_t *shader) +{ + key->kernel = i965_wm_kernel_hash (shader); + key->sampler = i965_shader_sampler_hash (shader); + + key->entry.hash = key->kernel ^ ((key->sampler) << 16 | (key->sampler >> 16)); +} + +cairo_bool_t +i965_wm_state_equal (const void *A, const void *B) +{ + const struct i965_wm_state *a = A, *b = B; + + if (a->entry.hash != b->entry.hash) + return FALSE; + + return a->kernel == b->kernel && a->sampler == b->sampler; +} + +static int +i965_shader_binding_table_count (i965_shader_t *shader) +{ + int count; + + count = 1; + if (shader->source.type.fragment != FS_CONSTANT) + count++; + switch (shader->mask.type.fragment) { + case FS_NONE: + case FS_CONSTANT: + case FS_SPANS: + break; + case FS_LINEAR: + case FS_RADIAL: + case FS_SURFACE: + case FS_GLYPHS: + count++; + } + if (shader->clip.type.fragment == FS_SURFACE) + count++; + if (shader->dst.type.fragment == FS_SURFACE) + count++; + + return count; +} + +static uint32_t +gen4_create_wm_state (i965_device_t *device, + i965_shader_t *shader) +{ + struct brw_wm_unit_state *state; + uint32_t sampler; + uint32_t kernel; + + struct i965_wm_state key, *cache; + cairo_status_t status; + int num_reg; + + i965_wm_state_init (&key, shader); + if (i965_wm_state_equal (&key, &device->wm_state)) + return device->wm_state.offset; + + cache = _cairo_hash_table_lookup (device->wm_states, &key.entry); + if (cache != NULL) { + device->wm_state = *cache; + return cache->offset; + } + + kernel = create_wm_kernel (device, shader, &num_reg); + sampler = emit_sampler_state_table (device, shader); + + state = i965_stream_alloc (&device->general, 32, sizeof (*state)); + memset (state, 0, sizeof (*state)); + state->thread0.grf_reg_count = BRW_GRF_BLOCKS (num_reg); + assert ((kernel & 63) == 0); + state->thread0.kernel_start_pointer = kernel >> 6; + + state->thread3.dispatch_grf_start_reg = 2; + + state->wm4.sampler_count = 1; /* 1-4 samplers used */ + assert ((sampler & 31) == 0); + state->wm4.sampler_state_pointer = sampler >> 5; + if (device->is_g4x) + state->wm5.max_threads = PS_MAX_THREADS_CTG - 1; + else + state->wm5.max_threads = PS_MAX_THREADS_BRW - 1; + state->wm5.thread_dispatch_enable = 1; + + if (device->is_g4x) { + /* XXX contiguous 32 pixel dispatch */ + } + state->wm5.enable_16_pix = 1; + /* 8 pixel dispatch and friends */ + //state->wm5.early_depth_test = 1; + + state->thread1.binding_table_entry_count = i965_shader_binding_table_count(shader); + state->thread3.urb_entry_read_length = i965_shader_pue_length (shader); + state->thread3.const_urb_entry_read_length = i965_shader_const_urb_length (shader); + + key.offset = i965_stream_offsetof (&device->general, state); + + cache = _cairo_freelist_alloc (&device->wm_state_freelist); + if (likely (cache != NULL)) { + *cache = key; + status = _cairo_hash_table_insert (device->wm_states, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->wm_state_freelist, cache); + } + + device->wm_state = key; + return key.offset; +} + +static uint32_t +vs_unit_state_emit (i965_device_t *device) +{ + if (device->vs_offset == (uint32_t) -1) { + struct brw_vs_unit_state *state; + + /* Set up the vertex shader to be disabled (passthrough) */ + state = i965_stream_alloc (&device->general, 32, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + state->thread4.nr_urb_entries = URB_VS_ENTRIES; + state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; + state->vs6.vert_cache_disable = 1; + + device->vs_offset = i965_stream_offsetof (&device->general, state); + } + + return device->vs_offset; +} + +static uint32_t +i965_get_card_format (cairo_format_t format) +{ + switch (format) { + case CAIRO_FORMAT_ARGB32: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + case CAIRO_FORMAT_RGB24: + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + case CAIRO_FORMAT_A8: + return BRW_SURFACEFORMAT_A8_UNORM; + case CAIRO_FORMAT_A1: + default: + ASSERT_NOT_REACHED; + return 0; + } +} + +static uint32_t +i965_get_dest_format (cairo_format_t format) +{ + switch (format) { + case CAIRO_FORMAT_ARGB32: + case CAIRO_FORMAT_RGB24: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + case CAIRO_FORMAT_A8: + return BRW_SURFACEFORMAT_A8_UNORM; + case CAIRO_FORMAT_A1: + default: + ASSERT_NOT_REACHED; + return 0; + } +} + +/* XXX silly inline due to compiler bug... */ +static inline void +i965_stream_add_pending_relocation (i965_stream_t *stream, + uint32_t target_offset, + uint32_t read_domains, + uint32_t write_domain, + uint32_t delta) +{ + int n; + + n = stream->num_pending_relocations++; + assert (n < stream->max_pending_relocations); + + stream->pending_relocations[n].offset = target_offset; + stream->pending_relocations[n].read_domains = read_domains; + stream->pending_relocations[n].write_domain = write_domain;; + stream->pending_relocations[n].delta = delta; +} + +static uint32_t +emit_surface_state (i965_device_t *device, + cairo_bool_t is_target, + intel_bo_t *bo, + cairo_format_t format, + int width, int height, int stride, + int type) +{ + struct brw_surface_state *state; + uint32_t write_domain, read_domains; + uint32_t offset; + + state = i965_stream_alloc (&device->surface, 32, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + state->ss0.surface_type = type; + if (is_target) + state->ss0.surface_format = i965_get_dest_format (format); + else + state->ss0.surface_format = i965_get_card_format (format); + + state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; + state->ss0.color_blend = 1; + if (is_target && device->is_g4x) + state->ss0.render_cache_read_mode = 1; + + state->ss1.base_addr = bo->offset; + + state->ss2.height = height - 1; + state->ss2.width = width - 1; + state->ss3.pitch = stride - 1; + state->ss3.tile_walk = bo->tiling == I915_TILING_Y; + state->ss3.tiled_surface = bo->tiling != I915_TILING_NONE; + + if (is_target) { + read_domains = I915_GEM_DOMAIN_RENDER; + write_domain = I915_GEM_DOMAIN_RENDER; + } else { + read_domains = I915_GEM_DOMAIN_SAMPLER; + write_domain = 0; + } + + offset = i965_stream_offsetof (&device->surface, state); + i965_emit_relocation (device, &device->surface, + bo, 0, + read_domains, write_domain, + offset + offsetof (struct brw_surface_state, ss1.base_addr)); + return offset; +} + +static uint32_t +emit_surface_state_for_shader (i965_device_t *device, + const union i965_shader_channel *channel) +{ + int type = BRW_SURFACE_2D; + + assert (channel->type.fragment != FS_NONE); + assert (channel->type.fragment != FS_CONSTANT); + + if (channel->type.fragment != FS_SURFACE) + type = BRW_SURFACE_1D; + + return emit_surface_state (device, FALSE, + channel->base.bo, + channel->base.format, + channel->base.width, + channel->base.height, + channel->base.stride, + type); +} + +cairo_bool_t +i965_wm_binding_equal (const void *A, + const void *B) +{ + const struct i965_wm_binding *a = A, *b = B; + + if (a->entry.hash != b->entry.hash) + return FALSE; + + if (a->size != b->size) + return FALSE; + + return memcmp (a->table, b->table, sizeof (uint32_t) * a->size) == 0; +} + +static void +i965_wm_binding_init (struct i965_wm_binding *state, + const uint32_t *table, + int size) +{ + int n; + + state->entry.hash = size; + state->size = size; + + for (n = 0; n < size; n++) { + state->table[n] = table[n]; + state->entry.hash ^= (table[n] << (8 * n)) | + (table[n] >> (32 - (8*n))); + } +} + +static uint32_t +emit_binding_table (i965_device_t *device, + i965_shader_t *shader) +{ + intel_bo_t *bo; + struct i965_wm_binding key, *cache; + uint32_t *table; + int n = 0; + + table = i965_stream_alloc (&device->surface, 32, 5 * sizeof (uint32_t)); + if (shader->target->stream != device->surface.serial) { + shader->target->stream = device->surface.serial; + shader->target->offset = emit_surface_state (device, + TRUE, + to_intel_bo (shader->target->intel.drm.bo), + shader->target->intel.drm.format, + shader->target->intel.drm.width, + shader->target->intel.drm.height, + shader->target->intel.drm.stride, + BRW_SURFACE_2D); + } + table[n++] = shader->target->offset; + + bo = shader->source.base.bo; + if (bo != NULL) { + if (bo->opaque0 != device->surface.serial) { + bo->opaque0 = device->surface.serial; + bo->opaque1 = emit_surface_state_for_shader (device, &shader->source); + } + table[n++] = bo->opaque1; + } + + bo = shader->mask.base.bo; + if (bo != NULL) { + if (bo->opaque0 != device->surface.serial) { + bo->opaque0 = device->surface.serial; + bo->opaque1 = emit_surface_state_for_shader (device, &shader->mask); + } + table[n++] = bo->opaque1; + } + + bo = shader->clip.base.bo; + if (bo != NULL) { + if (bo->opaque0 != device->surface.serial) { + bo->opaque0 = device->surface.serial; + bo->opaque1 = emit_surface_state_for_shader (device, &shader->clip); + } + table[n++] = bo->opaque1; + } + + bo = shader->dst.base.bo; + if (bo != NULL) { + if (bo->opaque0 != device->surface.serial) { + bo->opaque0 = device->surface.serial; + bo->opaque1 = emit_surface_state_for_shader (device, &shader->dst); + } + table[n++] = bo->opaque1; + } + + i965_wm_binding_init (&key, table, n); + key.offset = i965_stream_offsetof (&device->surface, table); + + if (i965_wm_binding_equal (&key, &device->wm_binding)) { + device->surface.used = key.offset; + return device->wm_binding.offset; + } + + cache = _cairo_hash_table_lookup (device->wm_bindings, &key.entry); + if (cache != NULL) { + device->surface.used = key.offset; + key.offset = cache->offset; + } + + device->wm_binding = key; + return key.offset; +} + +static void +i965_emit_invariants (i965_device_t *device) +{ + OUT_BATCH (BRW_CS_URB_STATE | 0); + OUT_BATCH (((URB_CS_ENTRY_SIZE-1) << 4) | (URB_CS_ENTRIES << 0)); +} + +static void +i965_emit_urb_fences (i965_device_t *device) +{ + int urb_vs_start, urb_vs_size; + int urb_gs_start, urb_gs_size; + int urb_clip_start, urb_clip_size; + int urb_sf_start, urb_sf_size; + int urb_cs_start, urb_cs_size; + + if (device->have_urb_fences) + return; + + /* URB fence */ + urb_vs_start = 0; + urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; + urb_gs_start = urb_vs_start + urb_vs_size; + urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; + urb_clip_start = urb_gs_start + urb_gs_size; + urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; + urb_sf_start = urb_clip_start + urb_clip_size; + urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; + urb_cs_start = urb_sf_start + urb_sf_size; + urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + + /* erratum: URB_FENCE must not cross a 64-byte cache-line */ + while ((device->batch.used & 63) > 64-12) + OUT_BATCH (MI_NOOP); + OUT_BATCH (BRW_URB_FENCE | + UF0_CS_REALLOC | + UF0_SF_REALLOC | + UF0_CLIP_REALLOC | + UF0_GS_REALLOC | + UF0_VS_REALLOC | + 1); + OUT_BATCH (((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | + ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | + ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); + OUT_BATCH (((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | + ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); + + device->have_urb_fences = TRUE; + device->constants_size = 0; +} + +static void +i965_emit_base (i965_device_t *device) +{ + OUT_BATCH (BRW_STATE_BASE_ADDRESS | 4); + if (likely (device->general.num_pending_relocations == 0)) { + i965_stream_add_pending_relocation (&device->general, + device->batch.used, + I915_GEM_DOMAIN_INSTRUCTION, 0, + BASE_ADDRESS_MODIFY); + } + OUT_BATCH (0); /* pending relocation */ + + if (likely (device->surface.num_pending_relocations == 0)) { + i965_stream_add_pending_relocation (&device->surface, + device->batch.used, + I915_GEM_DOMAIN_INSTRUCTION, 0, + BASE_ADDRESS_MODIFY); + } + OUT_BATCH (0); /* pending relocation */ + + OUT_BATCH (0 | BASE_ADDRESS_MODIFY); + /* general state max addr, disabled */ + OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY); + /* media object state max addr, disabled */ + OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY); +} + +static void +i965_emit_vertex_element (i965_device_t *device, + i965_shader_t *shader) +{ + uint32_t offset; + uint32_t type; + int nelem; + + type = 0; + nelem = 1; + if (shader->mask.type.vertex == VS_SPANS || + shader->mask.type.vertex == VS_GLYPHS) + { + type = shader->mask.type.vertex; + nelem++; + } + + if (type == device->vertex_type) + return; + device->vertex_type = type; + + offset = 0; + + OUT_BATCH (BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1)); + OUT_BATCH ((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (offset << VE0_OFFSET_SHIFT)); + OUT_BATCH ((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + offset += 8; + + assert (shader->source.type.vertex == VS_NONE); + switch (shader->mask.type.vertex) { + default: + case VS_NONE: + break; + + case VS_SPANS: + OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT) | + (offset << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + + offset += 4; + break; + + case VS_GLYPHS: + OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R16G16_FLOAT << VE0_FORMAT_SHIFT) | + (offset << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + + offset += 4; + break; + } + assert (shader->clip.type.vertex == VS_NONE); + assert (shader->dst.type.vertex == VS_NONE); + + device->vertex_size = offset; + i965_stream_align (&device->vertex, device->vertex_size); + device->vertex.committed = device->vertex.used; + + device->rectangle_size = 3 * offset; +} + +static cairo_bool_t +i965_shader_needs_surface_update (const i965_shader_t *shader, + const i965_device_t *device) +{ + return device->target != shader->target || shader->target->stream == 0 || + (shader->source.base.bo != NULL && device->source != shader->source.base.bo) || + (shader->mask.base.bo != NULL && device->mask != shader->mask.base.bo) || + (shader->clip.base.bo != NULL && device->clip != shader->clip.base.bo); +} + +static cairo_bool_t +i965_shader_needs_constants_update (const i965_shader_t *shader, + const i965_device_t *device) +{ + if (shader->constants_size == 0) + return FALSE; + + if (device->constants_size != shader->constants_size) + return TRUE; + + return memcmp (device->constants, + shader->constants, + sizeof (float) * shader->constants_size); +} + +static cairo_bool_t +i965_shader_needs_state_update (const i965_shader_t *shader, + const i965_device_t *device) +{ + union { + struct i965_sf_state sf; + struct i965_wm_state wm; + struct i965_cc_state cc; + } state; + + i965_sf_state_init (&state.sf, shader); + if (! i965_sf_state_equal (&state.sf, &device->sf_state)) + return TRUE; + + i965_wm_state_init (&state.wm, shader); + if (! i965_wm_state_equal (&state.wm, &device->wm_state)) + return TRUE; + + i965_cc_state_init (&state.cc, shader); + if (! i965_cc_state_equal (&state.cc, &device->cc_state)) + return TRUE; + + return FALSE; +} + +static void +i965_emit_composite (i965_device_t *device, + i965_shader_t *shader) +{ + uint32_t draw_rectangle; + + if (i965_shader_needs_surface_update (shader, device)) { + /* Binding table pointers */ + OUT_BATCH (BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); + OUT_BATCH (0); /* vs */ + OUT_BATCH (0); /* gs */ + OUT_BATCH (0); /* clip */ + OUT_BATCH (0); /* sf */ + /* Only the PS uses the binding table */ + OUT_BATCH (emit_binding_table (device, shader)); + + device->target = shader->target; + device->source = shader->source.base.bo; + device->mask = shader->mask.base.bo; + device->clip = shader->clip.base.bo; + } + + /* The drawing rectangle clipping is always on. Set it to values that + * shouldn't do any clipping. + */ + draw_rectangle = DRAW_YMAX (shader->target->intel.drm.height - 1) | + DRAW_XMAX (shader->target->intel.drm.width - 1); + if (draw_rectangle != device->draw_rectangle) { + OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2); + OUT_BATCH (0x00000000); /* ymin, xmin */ + OUT_BATCH (draw_rectangle); + OUT_BATCH (0x00000000); /* yorigin, xorigin */ + device->draw_rectangle = draw_rectangle; + } + + /* skip the depth buffer */ + /* skip the polygon stipple */ + /* skip the polygon stipple offset */ + /* skip the line stipple */ + + /* Set the pointers to the 3d pipeline state */ + if (i965_shader_needs_state_update (shader, device)) { + OUT_BATCH (BRW_3DSTATE_PIPELINED_POINTERS | 5); + OUT_BATCH (vs_unit_state_emit (device)); + OUT_BATCH (BRW_GS_DISABLE); + OUT_BATCH (BRW_CLIP_DISABLE); + OUT_BATCH (gen4_create_sf_state (device, shader)); + OUT_BATCH (gen4_create_wm_state (device, shader)); + OUT_BATCH (cc_state_emit (device, shader)); + + /* Once the units are initialized, we need to setup the fences */ + i965_emit_urb_fences (device); + } + + if (i965_shader_needs_constants_update (shader, device)) { + uint32_t size = (sizeof (float) * shader->constants_size + 63) & -64; + + /* XXX reuse clear/black/white + * ht! + */ + + /* XXX CONSTANT_BUFFER Address Offset Disable? INSTPM? */ + + assert (size <= 64 * URB_CS_ENTRY_SIZE); + assert (((sizeof (float) * shader->constants_size + 31) & -32) == 32 * i965_shader_const_urb_length (shader)); + + OUT_BATCH (BRW_CONSTANT_BUFFER | (1 << 8)); + assert ((device->constant.used & 63) == 0); + i965_stream_add_pending_relocation (&device->constant, + device->batch.used, + I915_GEM_DOMAIN_INSTRUCTION, 0, + device->constant.used + size / 64 - 1); + OUT_BATCH (0); /* pending relocation */ + + device->constants = i965_stream_alloc (&device->constant, 0, size); + memcpy (device->constants, shader->constants, size); + device->constants_size = shader->constants_size; + } + + i965_emit_vertex_element (device, shader); +} + +void +i965_flush_vertices (i965_device_t *device) +{ + int vertex_count, vertex_start; + + if (device->vertex.used == device->vertex.committed) + return; + + vertex_start = device->vertex.committed / device->vertex_size; + vertex_count = + (device->vertex.used - device->vertex.committed) / device->vertex_size; + + assert (vertex_count); + + if (device->vertex_size != device->last_vertex_size) { + i965_stream_add_pending_relocation (&device->vertex, + device->batch.used + 8, + I915_GEM_DOMAIN_VERTEX, 0, + 0); + + OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3); + OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) | + VB0_VERTEXDATA | + (device->vertex_size << VB0_BUFFER_PITCH_SHIFT)); + OUT_BATCH (0); /* pending relocation */ + OUT_BATCH (0); + OUT_BATCH (0); + device->last_vertex_size = device->vertex_size; + } + + OUT_BATCH (BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + OUT_BATCH (vertex_count); /* vertex count per instance */ + OUT_BATCH (vertex_start); /* start vertex offset */ + OUT_BATCH (1); /* single instance */ + OUT_BATCH (0); + OUT_BATCH (0); + + device->vertex.committed = device->vertex.used; + +#if 1 + OUT_BATCH (MI_FLUSH); +#endif +} + +void +i965_finish_vertices (i965_device_t *device) +{ + cairo_status_t status; + + i965_flush_vertices (device); + + i965_stream_commit (device, &device->vertex); + + if (! i965_shader_check_aperture (device->shader, device)) { + status = i965_device_flush (device); + if (unlikely (status)) + longjmp (device->shader->unwind, status); + + status = i965_shader_commit (device->shader, device); + assert (status == CAIRO_STATUS_SUCCESS); + } + + device->last_vertex_size = 0; +} + +static cairo_bool_t +i965_shader_needs_update (const i965_shader_t *shader, + const i965_device_t *device) +{ + if (i965_shader_needs_surface_update (shader, device)) + return TRUE; + + if (i965_shader_needs_constants_update (shader, device)) + return TRUE; + + return i965_shader_needs_state_update (shader, device); +} + +static void +i965_shader_reduce (i965_shader_t *shader, + const i965_device_t *device) +{ + if (shader->op == CAIRO_OPERATOR_OVER && + (i965_wm_kernel_hash (shader) & ~0xff) == 0 && + (shader->source.base.content & CAIRO_CONTENT_ALPHA) == 0) + { + shader->op = CAIRO_OPERATOR_SOURCE; + } +} + +cairo_status_t +i965_shader_commit (i965_shader_t *shader, + i965_device_t *device) +{ + cairo_status_t status; + + if (! shader->committed) { + device->shader = shader; + + status = i965_shader_setup_dst (shader); + if (unlikely (status)) + return status; + + i965_shader_setup_constants (shader); + i965_shader_reduce (shader, device); + + if ((status = setjmp (shader->unwind))) + return status; + + shader->committed = TRUE; + } + + if (! i965_shader_needs_update (shader, device)) + return CAIRO_STATUS_SUCCESS; + + /* XXX too many guestimates about likely maximum sizes */ +recheck: + if (device->batch.used + 128 > device->batch.size || + ! i965_shader_check_aperture (shader, device)) + { + status = i965_device_flush (device); + if (unlikely (status)) + longjmp (shader->unwind, status); + } + + i965_flush_vertices (device); + + if (unlikely (device->surface.used + 128 > device->surface.size || + device->surface.num_relocations + 4 > device->surface.max_relocations)) + { + i965_stream_commit (device, &device->surface); + goto recheck; + } + + if (unlikely (device->constant.used + sizeof (device->constants) > device->constant.size || + device->constant.num_pending_relocations == device->constant.max_pending_relocations)) + { + i965_stream_commit (device, &device->constant); + goto recheck; + } + + if (unlikely (device->general.used + 512 > device->general.size)) { + i965_stream_commit (device, &device->general); + i965_general_state_reset (device); + goto recheck; + } + + if (unlikely (device->batch.used == 0)) + i965_emit_invariants (device); + + if (unlikely (device->surface.num_pending_relocations == 0 || + device->general.num_pending_relocations == 0)) + { + i965_emit_base (device); + } + + i965_emit_composite (device, shader); + + return CAIRO_STATUS_SUCCESS; +} + +void +i965_clipped_vertices (i965_device_t *device, + struct i965_vbo *vbo, + cairo_region_t *clip_region) +{ + int i, num_rectangles, size; + cairo_status_t status; + + if (vbo->count == 0) + return; + + num_rectangles = cairo_region_num_rectangles (clip_region); + assert (num_rectangles); + + if (vbo->next || + vbo->count * device->vertex_size + device->vertex.used > device->vertex.size) + { + i965_finish_vertices (device); + + size = device->rectangle_size; + do { + for (i = 0; i < num_rectangles; i++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, i, &rect); + + if (unlikely (device->vertex.used + size > device->vertex.size || + device->batch.used + 64 > device->batch.size || + ! i965_shader_check_aperture (device->shader, device))) + { + status = i965_device_flush (device); + if (unlikely (status)) + longjmp (device->shader->unwind, status); + + status = i965_shader_commit (device->shader, device); + assert (status == CAIRO_STATUS_SUCCESS); + } + + i965_emit_relocation (device, &device->batch, + vbo->bo, 0, + I915_GEM_DOMAIN_VERTEX, 0, + device->batch.used + 8); + + OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3); + OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) | + VB0_VERTEXDATA | + (device->vertex_size << VB0_BUFFER_PITCH_SHIFT)); + OUT_BATCH (vbo->bo->offset); + OUT_BATCH (0); + OUT_BATCH (0); + + /* XXX scissor? */ + OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2); + OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x)); + OUT_BATCH (DRAW_YMAX (rect.y + rect.height - 1) | + DRAW_XMIN (rect.x + rect.width - 1)); + OUT_BATCH (0x00000000); /* yorigin, xorigin */ + + OUT_BATCH (BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + OUT_BATCH (vbo->count); /* vertex count per instance */ + OUT_BATCH (0); /* start vertex offset */ + OUT_BATCH (1); /* single instance */ + OUT_BATCH (0); + OUT_BATCH (0); + } + } while ((vbo = vbo->next) != NULL); + assert (device->last_vertex_size == 0); + } else { + int vertex_start, vertex_count; + void *ptr; + + vertex_start = device->vertex.committed / device->vertex_size; + vertex_count = vbo->count; + + size = vertex_count * device->vertex_size; + ptr = intel_bo_map (&device->intel, vbo->bo); + memcpy (device->vertex.data + device->vertex.used, ptr, size); + intel_bo_unmap (vbo->bo); + device->vertex.committed = device->vertex.used += size; + + for (i = 0; i < num_rectangles; i++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, i, &rect); + + /* XXX scissor? */ + OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2); + OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x)); + OUT_BATCH (DRAW_YMAX (rect.y + rect.height - 1) | + DRAW_XMIN (rect.x + rect.width - 1)); + OUT_BATCH (0x00000000); /* yorigin, xorigin */ + + OUT_BATCH (BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + OUT_BATCH (vertex_count); /* vertex count per instance */ + OUT_BATCH (vertex_start); /* start vertex offset */ + OUT_BATCH (1); /* single instance */ + OUT_BATCH (0); + OUT_BATCH (0); + } + } + + device->draw_rectangle = 0; +} diff --git a/src/drm/cairo-drm-i965-spans.c b/src/drm/cairo-drm-i965-spans.c new file mode 100644 index 00000000..9cf6d000 --- /dev/null +++ b/src/drm/cairo-drm-i965-spans.c @@ -0,0 +1,408 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Red Hat, Inc. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-composite-rectangles-private.h" +#include "cairo-boxes-private.h" +#include "cairo-error-private.h" +#include "cairo-drm-i965-private.h" + +/* Operates in either immediate or retained mode. + * When given a clip region we record the sequence of vbo and then + * replay them for each clip rectangle, otherwise we simply emit + * the vbo straight into the command stream. + */ + +typedef struct _i965_spans i965_spans_t; + +typedef float * +(*i965_get_rectangle_func_t) (i965_spans_t *spans); + +struct _i965_spans { + cairo_span_renderer_t renderer; + + i965_device_t *device; + + int xmin, xmax; + cairo_bool_t is_bounded; + const cairo_rectangle_int_t *extents; + + i965_get_rectangle_func_t get_rectangle; + i965_shader_t shader; + + cairo_region_t *clip_region; + + struct i965_vbo head, *tail; + + unsigned int vbo_offset; + float *vbo_base; +}; + +static float * +i965_spans_emit_rectangle (i965_spans_t *spans) +{ + return i965_add_rectangle (spans->device); +} + +static float * +i965_spans_accumulate_rectangle (i965_spans_t *spans) +{ + float *vertices; + uint32_t size; + + size = spans->device->rectangle_size; + if (unlikely (spans->vbo_offset + size > I965_VERTEX_SIZE)) { + struct i965_vbo *vbo; + + intel_bo_unmap (spans->tail->bo); + + vbo = malloc (sizeof (struct i965_vbo)); + if (unlikely (vbo == NULL)) { + /* throw error! */ + } + + spans->tail->next = vbo; + spans->tail = vbo; + + vbo->next = NULL; + vbo->bo = intel_bo_create (&spans->device->intel, I965_VERTEX_SIZE, FALSE); + vbo->count = 0; + + spans->vbo_offset = 0; + spans->vbo_base = intel_bo_map (&spans->device->intel, vbo->bo); + } + + vertices = spans->vbo_base + spans->vbo_offset; + spans->vbo_offset += size; + spans->tail->count += 3; + + return vertices; +} + +static void +i965_span_rectangle (i965_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha) +{ + float *vertices; + float a = alpha / 255.; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + *vertices++ = a; +} + +static cairo_status_t +i965_bounded_spans_mono (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i965_spans_t *spans = abstract_renderer; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + do { + if (half[0].coverage >= 128) { + i965_span_rectangle (spans, + half[0].x, half[1].x, + y, y + height, + 255); + } + half++; + } while (--num_spans > 1); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_bounded_spans (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i965_spans_t *spans = abstract_renderer; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + do { + if (half[0].coverage) { + i965_span_rectangle (spans, + half[0].x, half[1].x, + y, y + height, + half[0].coverage); + } + half++; + } while (--num_spans > 1); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_unbounded_spans (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i965_spans_t *spans = abstract_renderer; + + if (num_spans == 0) { + i965_span_rectangle (spans, + spans->xmin, spans->xmax, + y, y + height, + 0); + return CAIRO_STATUS_SUCCESS; + } + + if (half[0].x != spans->xmin) { + i965_span_rectangle (spans, + spans->xmin, half[0].x, + y, y + height, + 0); + } + + do { + i965_span_rectangle (spans, + half[0].x, half[1].x, + y, y + height, + half[0].coverage); + half++; + } while (--num_spans > 1); + + if (half[0].x != spans->xmax) { + i965_span_rectangle (spans, + half[0].x, spans->xmax, + y, y + height, + 0); + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_unbounded_spans_mono (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i965_spans_t *spans = abstract_renderer; + + if (num_spans == 0) { + i965_span_rectangle (spans, + spans->xmin, spans->xmax, + y, y + height, + 0); + return CAIRO_STATUS_SUCCESS; + } + + if (half[0].x != spans->xmin) { + i965_span_rectangle (spans, + spans->xmin, half[0].x, + y, y + height, + 0); + } + + do { + int alpha = 0; + if (half[0].coverage >= 128) + alpha = 255; + i965_span_rectangle (spans, + half[0].x, half[1].x, + y, y + height, + alpha); + half++; + } while (--num_spans > 1); + + if (half[0].x != spans->xmax) { + i965_span_rectangle (spans, + half[0].x, spans->xmax, + y, y + height, + 0); + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_spans_init (i965_spans_t *spans, + i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + cairo_status_t status; + + spans->device = i965_device (dst); + i965_shader_init (&spans->shader, dst, op); + + spans->is_bounded = extents->is_bounded; + if (extents->is_bounded) { + if (antialias == CAIRO_ANTIALIAS_NONE) + spans->renderer.render_rows = i965_bounded_spans_mono; + else + spans->renderer.render_rows = i965_bounded_spans; + + spans->extents = &extents->bounded; + } else { + if (antialias == CAIRO_ANTIALIAS_NONE) + spans->renderer.render_rows = i965_unbounded_spans_mono; + else + spans->renderer.render_rows = i965_unbounded_spans; + + spans->extents = &extents->unbounded; + } + spans->xmin = spans->extents->x; + spans->xmax = spans->extents->x + spans->extents->width; + + spans->clip_region = NULL; + if (clip != NULL) { + cairo_region_t *clip_region = NULL; + + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + spans->clip_region = clip_region; + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&spans->shader, clip); + } + + spans->head.next = NULL; + spans->head.bo = NULL; + spans->head.count = 0; + spans->tail = &spans->head; + + if (spans->clip_region == NULL) { + spans->get_rectangle = i965_spans_emit_rectangle; + } else { + spans->get_rectangle = i965_spans_accumulate_rectangle; + spans->head.bo = intel_bo_create (&spans->device->intel, + I965_VERTEX_SIZE, FALSE); + if (unlikely (spans->head.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + spans->vbo_base = intel_bo_map (&spans->device->intel, spans->head.bo); + } + spans->vbo_offset = 0; + + return i965_shader_acquire_pattern (&spans->shader, + &spans->shader.source, + pattern, &extents->bounded); +} + +static void +i965_spans_fini (i965_spans_t *spans) +{ + i965_shader_fini (&spans->shader); + + if (spans->head.bo != NULL) { + struct i965_vbo *vbo, *next; + + intel_bo_destroy (&spans->device->intel, spans->head.bo); + for (vbo = spans->head.next; vbo != NULL; vbo = next) { + next = vbo->next; + intel_bo_destroy (&spans->device->intel, vbo->bo); + free (vbo); + } + } +} + +cairo_status_t +i965_clip_and_composite_spans (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + i965_spans_func_t draw_func, + void *draw_closure, + const cairo_composite_rectangles_t*extents, + cairo_clip_t *clip) +{ + i965_spans_t spans; + i965_device_t *device; + cairo_status_t status; + + if (op == CAIRO_OPERATOR_CLEAR) { + pattern = &_cairo_pattern_white.base; + op = CAIRO_OPERATOR_DEST_OUT; + } + + status = i965_spans_init (&spans, dst, op, pattern, antialias, clip, extents); + if (unlikely (status)) + return status; + + spans.shader.mask.base.content = CAIRO_CONTENT_ALPHA; + spans.shader.mask.type.fragment = FS_SPANS; + spans.shader.mask.type.vertex = VS_SPANS; + spans.shader.mask.type.pattern = PATTERN_BASE; + + status = cairo_device_acquire (dst->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SPANS; + + device = i965_device (dst); + status = i965_shader_commit (&spans.shader, device); + if (unlikely (status)) + goto CLEANUP_DEVICE; + + status = draw_func (draw_closure, &spans.renderer, spans.extents); + if (spans.clip_region != NULL && status == CAIRO_STATUS_SUCCESS) { + intel_bo_unmap (spans.tail->bo); + i965_clipped_vertices (device, &spans.head, spans.clip_region); + } + + CLEANUP_DEVICE: + cairo_device_release (dst->intel.drm.base.device); + CLEANUP_SPANS: + i965_spans_fini (&spans); + + return status; +} diff --git a/src/drm/cairo-drm-i965-surface.c b/src/drm/cairo-drm-i965-surface.c new file mode 100644 index 00000000..0e0def81 --- /dev/null +++ b/src/drm/cairo-drm-i965-surface.c @@ -0,0 +1,1949 @@ +/* Cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Kristian Høgsberg + * Copyright © 2009 Chris Wilson + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Kristian Høgsberg. + * + * Based on the xf86-intel-driver i965 render acceleration code, + * authored by: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Eric Anholt <eric@anholt.net> + * Carl Worth <cworth@redhat.com> + * Keith Packard <keithp@keithp.com> + */ + +/* XXX + * + * FIXME: Use brw_PLN for [DevCTG-B+] + * + */ + +#include "cairoint.h" + +#include "cairo-drm-private.h" +#include "cairo-drm-ioctl-private.h" +#include "cairo-drm-intel-private.h" +#include "cairo-drm-intel-command-private.h" +#include "cairo-drm-intel-ioctl-private.h" +#include "cairo-drm-i965-private.h" + +#include "cairo-boxes-private.h" +#include "cairo-composite-rectangles-private.h" +#include "cairo-error-private.h" +#include "cairo-region-private.h" +#include "cairo-surface-offset-private.h" + +#include <sys/ioctl.h> +#include <errno.h> + +#define I965_MAX_SIZE 8192 + +static const cairo_surface_backend_t i965_surface_backend; + +static void +i965_stream_init (i965_stream_t *stream, + uint8_t *data, uint32_t size, + struct i965_pending_relocation *pending, int max_pending, + struct drm_i915_gem_relocation_entry *relocations, int max_relocations) + +{ + stream->used = stream->committed = 0; + stream->data = data; + stream->size = size; + stream->serial = 1; + + stream->num_pending_relocations = 0; + stream->max_pending_relocations = max_pending; + stream->pending_relocations = pending; + + stream->num_relocations = 0; + stream->max_relocations = max_relocations; + stream->relocations = relocations; +} + +static void +i965_add_relocation (i965_device_t *device, + intel_bo_t *bo, + uint32_t read_domains, + uint32_t write_domain) +{ + if (bo->exec == NULL) { + int i; + + device->exec.gtt_size += bo->base.size; + + i = device->exec.count++; + assert (i < ARRAY_LENGTH (device->exec.exec)); + + device->exec.exec[i].handle = bo->base.handle; + device->exec.exec[i].relocation_count = 0; + device->exec.exec[i].relocs_ptr = 0; + device->exec.exec[i].alignment = 0; + device->exec.exec[i].offset = 0; + device->exec.exec[i].flags = 0; + device->exec.exec[i].rsvd1 = 0; + device->exec.exec[i].rsvd2 = 0; + + device->exec.bo[i] = intel_bo_reference (bo); + bo->exec = &device->exec.exec[i]; + } + + if (cairo_list_is_empty (&bo->link)) + cairo_list_add_tail (&device->flush, &bo->link); + + assert (write_domain == 0 || bo->batch_write_domain == 0 || bo->batch_write_domain == write_domain); + bo->batch_read_domains |= read_domains; + bo->batch_write_domain |= write_domain; +} + +void +i965_emit_relocation (i965_device_t *device, + i965_stream_t *stream, + intel_bo_t *target, + uint32_t target_offset, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset) +{ + int n; + + assert (target_offset < target->base.size); + + i965_add_relocation (device, target, read_domains, write_domain); + + n = stream->num_relocations++; + assert (n < stream->max_relocations); + + stream->relocations[n].offset = offset; + stream->relocations[n].delta = target_offset; + stream->relocations[n].target_handle = target->base.handle; + stream->relocations[n].read_domains = read_domains; + stream->relocations[n].write_domain = write_domain; + stream->relocations[n].presumed_offset = target->offset; +} + +static void +i965_stream_reset (i965_stream_t *stream) +{ + stream->used = stream->committed = 0; + stream->num_relocations = 0; + stream->num_pending_relocations = 0; + if (++stream->serial == 0) + stream->serial = 1; +} + +void +i965_stream_commit (i965_device_t *device, + i965_stream_t *stream) +{ + intel_bo_t *bo; + int n; + + assert (stream->used); + + bo = intel_bo_create (&device->intel, stream->used, FALSE); + + /* apply pending relocations */ + for (n = 0; n < stream->num_pending_relocations; n++) { + struct i965_pending_relocation *p = &stream->pending_relocations[n]; + + i965_emit_relocation (device, &device->batch, bo, + p->delta, + p->read_domains, + p->write_domain, + p->offset); + if (bo->offset) + *(uint32_t *) (device->batch.data + p->offset) = bo->offset + p->delta; + } + + intel_bo_write (&device->intel, bo, 0, stream->used, stream->data); + + if (stream->num_relocations) { + assert (bo->exec != NULL); + bo->exec->relocs_ptr = (uintptr_t) stream->relocations; + bo->exec->relocation_count = stream->num_relocations; + } + + intel_bo_destroy (&device->intel, bo); + + i965_stream_reset (stream); +} + +static void +sf_states_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->sf_states, entry); + _cairo_freelist_free (&device->sf_freelist, entry); +} + +static void +cc_offsets_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->cc_states, entry); + _cairo_freelist_free (&device->cc_freelist, entry); +} + +static void +wm_kernels_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->wm_kernels, entry); + _cairo_freelist_free (&device->wm_kernel_freelist, entry); +} + +static void +wm_states_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->wm_states, entry); + _cairo_freelist_free (&device->wm_state_freelist, entry); +} + +static void +wm_bindings_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->wm_bindings, entry); + _cairo_freelist_free (&device->wm_binding_freelist, entry); +} + +static void +samplers_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->samplers, entry); + _cairo_freelist_free (&device->sampler_freelist, entry); +} + +void +i965_general_state_reset (i965_device_t *device) +{ + _cairo_hash_table_foreach (device->sf_states, + sf_states_pluck, + device); + + _cairo_hash_table_foreach (device->cc_states, + cc_offsets_pluck, + device); + + _cairo_hash_table_foreach (device->wm_kernels, + wm_kernels_pluck, + device); + + _cairo_hash_table_foreach (device->wm_states, + wm_states_pluck, + device); + + _cairo_hash_table_foreach (device->wm_bindings, + wm_bindings_pluck, + device); + + _cairo_hash_table_foreach (device->samplers, + samplers_pluck, + device); + + device->vs_offset = (uint32_t) -1; + device->border_color_offset = (uint32_t) -1; + + if (device->general_state != NULL) { + intel_bo_destroy (&device->intel, device->general_state); + device->general_state = NULL; + } +} + +static void +i965_device_reset (i965_device_t *device) +{ + device->exec.count = 0; + device->exec.gtt_size = I965_CONSTANT_SIZE + + I965_VERTEX_SIZE + + I965_SURFACE_SIZE + + I965_GENERAL_SIZE + + I965_BATCH_SIZE; + + device->sf_state.entry.hash = (uint32_t) -1; + device->wm_state.entry.hash = (uint32_t) -1; + device->wm_binding.entry.hash = (uint32_t) -1; + device->cc_state.entry.hash = (uint32_t) -1; + + device->target = NULL; + device->source = NULL; + device->mask = NULL; + device->clip = NULL; + + device->draw_rectangle = (uint32_t) -1; + + device->vertex_type = (uint32_t) -1; + device->vertex_size = 0; + device->rectangle_size = 0; + device->last_vertex_size = 0; + + device->constants = NULL; + device->constants_size = 0; + + device->have_urb_fences = FALSE; +} + +static cairo_status_t +i965_exec (i965_device_t *device, uint32_t offset) +{ + struct drm_i915_gem_execbuffer2 execbuf; + cairo_status_t status = CAIRO_STATUS_SUCCESS; + int ret, i; + + execbuf.buffers_ptr = (uintptr_t) device->exec.exec; + execbuf.buffer_count = device->exec.count; + execbuf.batch_start_offset = offset; + execbuf.batch_len = device->batch.used; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.num_cliprects = 0; + execbuf.cliprects_ptr = 0; + execbuf.flags = I915_GEM_3D_PIPELINE; + execbuf.rsvd1 = 0; + execbuf.rsvd2 = 0; + +#if 0 + printf ("exec: offset=%d, length=%d, buffers=%d\n", + offset, device->batch.used, device->exec.count); + intel_dump_batchbuffer ((uint32_t *) device->batch.data, + device->batch.used, + device->intel.base.chip_id); +#endif + + ret = 0; + do { + ret = ioctl (device->intel.base.fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); + } while (ret != 0 && errno == EINTR); + if (unlikely (ret)) { + int n; + + if (errno == ENOMEM) + status = _cairo_error (CAIRO_STATUS_NO_MEMORY); + else + status = _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + + fprintf (stderr, "Batch submission failed: %d\n", errno); + fprintf (stderr, " gtt size: %zd/%zd\n", + device->exec.gtt_size, device->intel.gtt_avail_size); + + fprintf (stderr, " %d buffers:\n", + device->exec.count); + for (n = 0; n < i; n++) { + fprintf (stderr, " exec[%d] = %d\n", + n, device->exec.bo[n]->base.size); + } + + intel_dump_batchbuffer ((uint32_t *) device->batch.data, + device->batch.used, + device->intel.base.chip_id); + } + + /* XXX any write target within the batch should now be in error */ + for (i = 0; i < device->exec.count; i++) { + cairo_bool_t ret; + + device->exec.bo[i]->offset = device->exec.exec[i].offset; + device->exec.bo[i]->exec = NULL; + device->exec.bo[i]->batch_read_domains = 0; + device->exec.bo[i]->batch_write_domain = 0; + + if (device->exec.bo[i]->purgeable) { + ret = intel_bo_madvise (&device->intel, + device->exec.bo[i], + I915_MADV_DONTNEED); + /* ignore immediate notification of purging */ + } + + cairo_list_init (&device->exec.bo[i]->link); + intel_bo_destroy (&device->intel, device->exec.bo[i]); + } + cairo_list_init (&device->flush); + + device->exec.count = 0; + + return status; +} + +static inline uint32_t +next_bo_size (uint32_t v) +{ + v = (v + 8191) / 8192; + + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + + return v * 8192; +} + +static void +_copy_to_bo_and_apply_relocations (i965_device_t *device, + intel_bo_t *bo, + i965_stream_t *stream, + uint32_t offset) +{ + int n; + + intel_bo_write (&device->intel, bo, + offset, stream->used, + stream->data); + + for (n = 0; n < stream->num_pending_relocations; n++) { + struct i965_pending_relocation *p = &stream->pending_relocations[n]; + + i965_emit_relocation (device, &device->batch, bo, + p->delta + offset, + p->read_domains, + p->write_domain, + p->offset); + + if (bo->offset) { + *(uint32_t *) (device->batch.data + p->offset) = + bo->offset + p->delta + offset; + } + } +} + +cairo_status_t +i965_device_flush (i965_device_t *device) +{ + cairo_status_t status; + uint32_t aligned, max; + intel_bo_t *bo; + int n; + + if (device->batch.used == 0) + return CAIRO_STATUS_SUCCESS; + + i965_flush_vertices (device); + + OUT_BATCH (MI_BATCH_BUFFER_END); + /* Emit a padding dword if we aren't going to be quad-word aligned. */ + if (device->batch.used & 4) + OUT_BATCH (MI_NOOP); + +#if 0 + printf ("device flush: vertex=%d, constant=%d, surface=%d, general=%d, batch=%d\n", + device->vertex.used, + device->constant.used, + device->surface.used, + device->general.used, + device->batch.used); +#endif + + /* can we pack the surface state into the tail of the general state? */ + if (device->general.used == device->general.committed) { + if (device->general.used) { + assert (device->general.num_pending_relocations == 1); + assert (device->general_state != NULL); + i965_emit_relocation (device, &device->batch, + device->general_state, + device->general.pending_relocations[0].delta, + device->general.pending_relocations[0].read_domains, + device->general.pending_relocations[0].write_domain, + device->general.pending_relocations[0].offset); + + if (device->general_state->offset) { + *(uint32_t *) (device->batch.data + + device->general.pending_relocations[0].offset) = + device->general_state->offset + + device->general.pending_relocations[0].delta; + } + } + } else { + assert (device->general.num_pending_relocations == 1); + if (device->general_state != NULL) { + intel_bo_destroy (&device->intel, device->general_state); + device->general_state = NULL; + } + + bo = intel_bo_create (&device->intel, + device->general.used, + FALSE); + if (unlikely (bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + aligned = (device->general.used + 31) & -32; + if (device->surface.used && + aligned + device->surface.used <= bo->base.size) + { + _copy_to_bo_and_apply_relocations (device, bo, &device->general, 0); + _copy_to_bo_and_apply_relocations (device, bo, &device->surface, aligned); + + if (device->surface.num_relocations) { + for (n = 0; n < device->surface.num_relocations; n++) + device->surface.relocations[n].offset += aligned; + + assert (bo->exec != NULL); + bo->exec->relocs_ptr = (uintptr_t) device->surface.relocations; + bo->exec->relocation_count = device->surface.num_relocations; + } + + i965_stream_reset (&device->surface); + } + else + { + _copy_to_bo_and_apply_relocations (device, bo, &device->general, 0); + } + + /* Note we don't reset the general state, just mark what data we've committed. */ + device->general.committed = device->general.used; + device->general_state = bo; + } + device->general.num_pending_relocations = 0; + + /* Combine vertex+constant+surface+batch streams? */ + max = aligned = device->vertex.used; + if (device->constant.used) { + aligned = (aligned + 63) & -64; + aligned += device->constant.used; + if (device->constant.used > max) + max = device->constant.used; + } + if (device->surface.used) { + aligned = (aligned + 31) & -32; + aligned += device->surface.used; + if (device->surface.used > max) + max = device->surface.used; + } + aligned = (aligned + 63) & -64; + aligned += device->batch.used; + if (device->batch.used > max) + max = device->batch.used; + if (aligned <= next_bo_size (max)) { + int batch_num_relocations; + + if (aligned <= 8192) + max = aligned; + + bo = intel_bo_create (&device->intel, max, FALSE); + if (unlikely (bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + assert (aligned <= bo->base.size); + + if (device->vertex.used) + _copy_to_bo_and_apply_relocations (device, bo, &device->vertex, 0); + + aligned = device->vertex.used; + if (device->constant.used) { + aligned = (aligned + 63) & -64; + _copy_to_bo_and_apply_relocations (device, bo, &device->constant, aligned); + aligned += device->constant.used; + } + + batch_num_relocations = device->batch.num_relocations; + if (device->surface.used) { + aligned = (aligned + 31) & -32; + _copy_to_bo_and_apply_relocations (device, bo, &device->surface, aligned); + + batch_num_relocations = device->batch.num_relocations; + if (device->surface.num_relocations) { + assert (device->batch.num_relocations + device->surface.num_relocations < device->batch.max_relocations); + + memcpy (device->batch.relocations + device->batch.num_relocations, + device->surface.relocations, + sizeof (device->surface.relocations[0]) * device->surface.num_relocations); + + for (n = 0; n < device->surface.num_relocations; n++) + device->batch.relocations[device->batch.num_relocations + n].offset += aligned; + + device->batch.num_relocations += device->surface.num_relocations; + } + + aligned += device->surface.used; + } + + aligned = (aligned + 63) & -64; + intel_bo_write (&device->intel, bo, + aligned, device->batch.used, + device->batch.data); + + for (n = 0; n < batch_num_relocations; n++) + device->batch.relocations[n].offset += aligned; + + if (device->exec.bo[device->exec.count-1] == bo) { + assert (bo->exec == &device->exec.exec[device->exec.count-1]); + + bo->exec->relocation_count = device->batch.num_relocations; + bo->exec->relocs_ptr = (uintptr_t) device->batch.relocations; + intel_bo_destroy (&device->intel, bo); + } else { + assert (bo->exec == NULL); + + n = device->exec.count++; + device->exec.exec[n].handle = bo->base.handle; + device->exec.exec[n].relocation_count = device->batch.num_relocations; + device->exec.exec[n].relocs_ptr = (uintptr_t) device->batch.relocations; + device->exec.exec[n].alignment = 0; + device->exec.exec[n].offset = 0; + device->exec.exec[n].flags = 0; + device->exec.exec[n].rsvd1 = 0; + device->exec.exec[n].rsvd2 = 0; + + /* transfer ownership to the exec */ + device->exec.bo[n] = bo; + } + } else { + i965_stream_commit (device, &device->vertex); + + if (device->constant.used && device->surface.used){ + aligned = (device->constant.used + 31) & -32; + aligned += device->surface.used; + + max = MAX (device->constant.used, device->surface.used); + if (aligned <= next_bo_size (max)) { + if (aligned <= 8192) + max = aligned; + + bo = intel_bo_create (&device->intel, max, FALSE); + if (unlikely (bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + assert (aligned <= bo->base.size); + + _copy_to_bo_and_apply_relocations (device, bo, &device->constant, 0); + + aligned = (device->constant.used + 31) & -32; + + _copy_to_bo_and_apply_relocations (device, bo, &device->surface, aligned); + + if (device->surface.num_relocations) { + assert (bo->exec != NULL); + + for (n = 0; n < device->surface.num_relocations; n++) + device->surface.relocations[n].offset += aligned; + + bo->exec->relocs_ptr = (uintptr_t) device->surface.relocations; + bo->exec->relocation_count = device->surface.num_relocations; + } + + i965_stream_reset (&device->surface); + i965_stream_reset (&device->constant); + + intel_bo_destroy (&device->intel, bo); + } + } else { + if (device->constant.used) + i965_stream_commit (device, &device->constant); + if (device->surface.used) + i965_stream_commit (device, &device->surface); + } + + bo = intel_bo_create (&device->intel, device->batch.used, FALSE); + if (unlikely (bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + intel_bo_write (&device->intel, bo, + 0, device->batch.used, + device->batch.data); + + n = device->exec.count++; + device->exec.exec[n].handle = bo->base.handle; + device->exec.exec[n].relocation_count = device->batch.num_relocations; + device->exec.exec[n].relocs_ptr = (uintptr_t) device->batch.relocations; + device->exec.exec[n].alignment = 0; + device->exec.exec[n].offset = 0; + device->exec.exec[n].flags = 0; + device->exec.exec[n].rsvd1 = 0; + device->exec.exec[n].rsvd2 = 0; + + /* transfer ownership to the exec */ + device->exec.bo[n] = bo; + aligned = 0; + } + + intel_glyph_cache_unmap (&device->intel); + + status = i965_exec (device, aligned); + + i965_stream_reset (&device->vertex); + i965_stream_reset (&device->surface); + i965_stream_reset (&device->constant); + i965_stream_reset (&device->batch); + + intel_glyph_cache_unpin (&device->intel); + intel_snapshot_cache_thaw (&device->intel); + + i965_device_reset (device); + + return status; +} + +static cairo_status_t +i965_surface_finish (void *abstract_surface) +{ + i965_surface_t *surface = abstract_surface; + + return intel_surface_finish (&surface->intel); +} + +static cairo_status_t +i965_surface_flush (void *abstract_surface) +{ + i965_surface_t *surface = abstract_surface; + cairo_status_t status = CAIRO_STATUS_SUCCESS; + + if (surface->intel.drm.fallback != NULL) + return intel_surface_flush (abstract_surface); + + /* Forgo flushing on finish as the user cannot access the surface directly. */ + if (! surface->intel.drm.base.finished && + to_intel_bo (surface->intel.drm.bo)->exec != NULL) + { + status = cairo_device_acquire (surface->intel.drm.base.device); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + i965_device_t *device; + + device = i965_device (surface); + status = i965_device_flush (device); + cairo_device_release (&device->intel.base.base); + } + } + + return status; +} + +/* rasterisation */ + +static cairo_status_t +_composite_boxes_spans (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents) +{ + cairo_boxes_t *boxes = closure; + cairo_rectangular_scan_converter_t converter; + struct _cairo_boxes_chunk *chunk; + cairo_status_t status; + + _cairo_rectangular_scan_converter_init (&converter, extents); + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + cairo_box_t *box = chunk->base; + int i; + + for (i = 0; i < chunk->count; i++) { + status = _cairo_rectangular_scan_converter_add_box (&converter, &box[i], 1); + if (unlikely (status)) + goto CLEANUP; + } + } + + status = converter.base.generate (&converter.base, renderer); + + CLEANUP: + converter.base.destroy (&converter.base); + return status; +} + +cairo_status_t +i965_fixup_unbounded (i965_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip) +{ + i965_shader_t shader; + cairo_status_t status; + + i965_shader_init (&shader, dst, CAIRO_OPERATOR_CLEAR); + + if (clip != NULL) { + cairo_region_t *clip_region = NULL; + + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + assert (clip_region == NULL); + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&shader, clip); + } else { + if (extents->bounded.width == extents->unbounded.width && + extents->bounded.height == extents->unbounded.height) + { + return CAIRO_STATUS_SUCCESS; + } + } + + status = i965_shader_acquire_pattern (&shader, + &shader.source, + &_cairo_pattern_clear.base, + &extents->unbounded); + if (unlikely (status)) { + i965_shader_fini (&shader); + return status; + } + + status = i965_shader_commit (&shader, i965_device (dst)); + if (unlikely (status)) { + i965_shader_fini (&shader); + return status; + } + + /* top */ + if (extents->bounded.y != extents->unbounded.y) { + cairo_rectangle_int_t rect; + + rect.x = extents->unbounded.x; + rect.y = extents->unbounded.y; + rect.width = extents->unbounded.width; + rect.height = extents->bounded.y - rect.y; + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + + /* left */ + if (extents->bounded.x != extents->unbounded.x) { + cairo_rectangle_int_t rect; + + rect.x = extents->unbounded.x; + rect.y = extents->bounded.y; + rect.width = extents->bounded.x - extents->unbounded.x; + rect.height = extents->bounded.height; + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + + /* right */ + if (extents->bounded.x + extents->bounded.width != extents->unbounded.x + extents->unbounded.width) { + cairo_rectangle_int_t rect; + + rect.x = extents->bounded.x + extents->bounded.width; + rect.y = extents->bounded.y; + rect.width = extents->unbounded.x + extents->unbounded.width - rect.x; + rect.height = extents->bounded.height; + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + + /* bottom */ + if (extents->bounded.y + extents->bounded.height != extents->unbounded.y + extents->unbounded.height) { + cairo_rectangle_int_t rect; + + rect.x = extents->unbounded.x; + rect.y = extents->bounded.y + extents->bounded.height; + rect.width = extents->unbounded.width; + rect.height = extents->unbounded.y + extents->unbounded.height - rect.y; + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + + i965_shader_fini (&shader); + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_fixup_unbounded_boxes (i965_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip, + cairo_boxes_t *boxes) +{ + cairo_boxes_t clear; + cairo_box_t box; + cairo_region_t *clip_region = NULL; + cairo_status_t status; + struct _cairo_boxes_chunk *chunk; + i965_shader_t shader; + int i; + + if (boxes->num_boxes <= 1) + return i965_fixup_unbounded (dst, extents, clip); + + i965_shader_init (&shader, dst, CAIRO_OPERATOR_CLEAR); + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&shader, clip); + } + + status = i965_shader_acquire_pattern (&shader, + &shader.source, + &_cairo_pattern_clear.base, + &extents->unbounded); + if (unlikely (status)) { + i965_shader_fini (&shader); + return status; + } + + _cairo_boxes_init (&clear); + + box.p1.x = _cairo_fixed_from_int (extents->unbounded.x + extents->unbounded.width); + box.p1.y = _cairo_fixed_from_int (extents->unbounded.y); + box.p2.x = _cairo_fixed_from_int (extents->unbounded.x); + box.p2.y = _cairo_fixed_from_int (extents->unbounded.y + extents->unbounded.height); + + if (clip_region == NULL) { + cairo_boxes_t tmp; + + _cairo_boxes_init (&tmp); + + status = _cairo_boxes_add (&tmp, &box); + assert (status == CAIRO_STATUS_SUCCESS); + + tmp.chunks.next = &boxes->chunks; + tmp.num_boxes += boxes->num_boxes; + + status = _cairo_bentley_ottmann_tessellate_boxes (&tmp, + CAIRO_FILL_RULE_WINDING, + &clear); + + tmp.chunks.next = NULL; + } else { + pixman_box32_t *pbox; + + pbox = pixman_region32_rectangles (&clip_region->rgn, &i); + _cairo_boxes_limit (&clear, (cairo_box_t *) pbox, i); + + status = _cairo_boxes_add (&clear, &box); + assert (status == CAIRO_STATUS_SUCCESS); + + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + for (i = 0; i < chunk->count; i++) { + status = _cairo_boxes_add (&clear, &chunk->base[i]); + if (unlikely (status)) { + _cairo_boxes_fini (&clear); + return status; + } + } + } + + status = _cairo_bentley_ottmann_tessellate_boxes (&clear, + CAIRO_FILL_RULE_WINDING, + &clear); + } + + if (likely (status == CAIRO_STATUS_SUCCESS && clear.num_boxes)) { + status = i965_shader_commit (&shader, i965_device (dst)); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + for (chunk = &clear.chunks; chunk != NULL; chunk = chunk->next) { + for (i = 0; i < chunk->count; i++) { + int x1 = _cairo_fixed_integer_part (chunk->base[i].p1.x); + int y1 = _cairo_fixed_integer_part (chunk->base[i].p1.y); + int x2 = _cairo_fixed_integer_part (chunk->base[i].p2.x); + int y2 = _cairo_fixed_integer_part (chunk->base[i].p2.y); + + i965_shader_add_rectangle (&shader, x1, y1, x2 - x1, y2 - y1); + } + } + } + i965_shader_fini (&shader); + } + + _cairo_boxes_fini (&clear); + + return status; +} + +static cairo_status_t +_composite_boxes (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_boxes_t *boxes, + cairo_antialias_t antialias, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + cairo_bool_t need_clip_surface = FALSE; + cairo_region_t *clip_region = NULL; + const struct _cairo_boxes_chunk *chunk; + cairo_status_t status; + i965_shader_t shader; + int i; + + /* If the boxes are not pixel-aligned, we will need to compute a real mask */ + if (antialias != CAIRO_ANTIALIAS_NONE) { + if (! boxes->is_pixel_aligned) + return CAIRO_INT_STATUS_UNSUPPORTED; + } + + i965_shader_init (&shader, dst, op); + + status = i965_shader_acquire_pattern (&shader, + &shader.source, + pattern, + &extents->bounded); + if (unlikely (status)) + return status; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + if (need_clip_surface) + i965_shader_set_clip (&shader, clip); + } + + status = i965_shader_commit (&shader, i965_device (dst)); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + cairo_box_t *box = chunk->base; + for (i = 0; i < chunk->count; i++) { + int x1 = _cairo_fixed_integer_round (box[i].p1.x); + int y1 = _cairo_fixed_integer_round (box[i].p1.y); + int x2 = _cairo_fixed_integer_round (box[i].p2.x); + int y2 = _cairo_fixed_integer_round (box[i].p2.y); + + if (x2 > x1 && y2 > y1) + i965_shader_add_rectangle (&shader, x1, y1, x2 - x1, y2 - y1); + } + } + } + i965_shader_fini (&shader); + + if (status == CAIRO_STATUS_SUCCESS && ! extents->is_bounded) + status = i965_fixup_unbounded_boxes (dst, extents, clip, boxes); + + return status; +} + +static cairo_status_t +_clip_and_composite_boxes (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *src, + cairo_boxes_t *boxes, + cairo_antialias_t antialias, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip) +{ + cairo_status_t status; + + if (boxes->num_boxes == 0) { + if (extents->is_bounded) + return CAIRO_STATUS_SUCCESS; + + return i965_fixup_unbounded (dst, extents, clip); + } + + /* Use a fast path if the boxes are pixel aligned */ + status = _composite_boxes (dst, op, src, boxes, antialias, clip, extents); + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + return status; + + /* Otherwise render the boxes via an implicit mask and composite in the usual + * fashion. + */ + return i965_clip_and_composite_spans (dst, op, src, antialias, + _composite_boxes_spans, boxes, + extents, clip); +} + +static cairo_bool_t +box_is_aligned (const cairo_box_t *box) +{ + return + _cairo_fixed_is_integer (box->p1.x) && + _cairo_fixed_is_integer (box->p1.y) && + _cairo_fixed_is_integer (box->p2.x) && + _cairo_fixed_is_integer (box->p2.y); +} + +static inline cairo_status_t +_clip_to_boxes (cairo_clip_t **clip, + const cairo_composite_rectangles_t *extents, + cairo_box_t **boxes, + int *num_boxes) +{ + cairo_status_t status; + const cairo_rectangle_int_t *rect; + + rect = extents->is_bounded ? &extents->bounded: &extents->unbounded; + + if (*clip == NULL) + goto EXTENTS; + + status = _cairo_clip_rectangle (*clip, rect); + if (unlikely (status)) + return status; + + status = _cairo_clip_get_boxes (*clip, boxes, num_boxes); + if (status != CAIRO_INT_STATUS_UNSUPPORTED) { + if (extents->is_bounded || (*num_boxes == 1 && box_is_aligned (*boxes))) + *clip = NULL; + return status; + } + + EXTENTS: + _cairo_box_from_rectangle (&(*boxes)[0], rect); + *num_boxes = 1; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_int_status_t +i965_surface_paint (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_clip_t *clip) +{ + i965_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + cairo_boxes_t boxes; + cairo_box_t *clip_boxes = boxes.boxes_embedded; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + int num_boxes = ARRAY_LENGTH (boxes.boxes_embedded); + cairo_status_t status; + + /* XXX unsupported operators? use pixel shader blending, eventually */ + + status = _cairo_composite_rectangles_init_for_paint (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, + clip); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + _cairo_boxes_init_for_array (&boxes, clip_boxes, num_boxes); + status = _clip_and_composite_boxes (dst, op, source, + &boxes, CAIRO_ANTIALIAS_DEFAULT, + &extents, clip); + if (clip_boxes != boxes.boxes_embedded) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static cairo_int_status_t +i965_surface_mask (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + const cairo_pattern_t *mask, + cairo_clip_t *clip) +{ + i965_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + i965_shader_t shader; + cairo_clip_t local_clip; + cairo_region_t *clip_region = NULL; + cairo_bool_t need_clip_surface = FALSE; + cairo_bool_t have_clip = FALSE; + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_mask (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, mask, clip); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL && extents.is_bounded) { + clip = _cairo_clip_init_copy (&local_clip, clip); + status = _cairo_clip_rectangle (clip, &extents.bounded); + if (unlikely (status)) { + _cairo_clip_fini (&local_clip); + return status; + } + + have_clip = TRUE; + } + + i965_shader_init (&shader, dst, op); + + status = i965_shader_acquire_pattern (&shader, + &shader.source, + source, + &extents.bounded); + if (unlikely (status)) + goto BAIL; + + status = i965_shader_acquire_pattern (&shader, + &shader.mask, + mask, + &extents.bounded); + if (unlikely (status)) + goto BAIL; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + if (need_clip_surface) + i965_shader_set_clip (&shader, clip); + } + + status = i965_shader_commit (&shader, i965_device (dst)); + if (unlikely (status)) + goto BAIL; + + if (clip_region != NULL) { + unsigned int n, num_rectangles; + + num_rectangles = cairo_region_num_rectangles (clip_region); + for (n = 0; n < num_rectangles; n++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, n, &rect); + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + } else { + i965_shader_add_rectangle (&shader, + extents.bounded.x, + extents.bounded.y, + extents.bounded.width, + extents.bounded.height); + } + + if (! extents.is_bounded) + status = i965_fixup_unbounded (dst, &extents, clip); + + BAIL: + i965_shader_fini (&shader); + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +typedef struct { + cairo_polygon_t polygon; + cairo_fill_rule_t fill_rule; + cairo_antialias_t antialias; +} composite_polygon_info_t; + +static cairo_status_t +_composite_polygon_spans (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents) +{ + composite_polygon_info_t *info = closure; + cairo_botor_scan_converter_t converter; + cairo_status_t status; + cairo_box_t box; + + box.p1.x = _cairo_fixed_from_int (extents->x); + box.p1.y = _cairo_fixed_from_int (extents->y); + box.p2.x = _cairo_fixed_from_int (extents->x + extents->width); + box.p2.y = _cairo_fixed_from_int (extents->y + extents->height); + + _cairo_botor_scan_converter_init (&converter, &box, info->fill_rule); + + status = converter.base.add_polygon (&converter.base, &info->polygon); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = converter.base.generate (&converter.base, renderer); + + converter.base.destroy (&converter.base); + + return status; +} + +static cairo_int_status_t +i965_surface_stroke (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + const cairo_stroke_style_t *stroke_style, + const cairo_matrix_t *ctm, + const cairo_matrix_t *ctm_inverse, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + i965_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + composite_polygon_info_t info; + cairo_box_t boxes_stack[32], *clip_boxes = boxes_stack; + int num_boxes = ARRAY_LENGTH (boxes_stack); + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_stroke (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, + path, stroke_style, ctm, + clip); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + if (path->is_rectilinear) { + cairo_boxes_t boxes; + + _cairo_boxes_init (&boxes); + _cairo_boxes_limit (&boxes, clip_boxes, num_boxes); + status = _cairo_path_fixed_stroke_rectilinear_to_boxes (path, + stroke_style, + ctm, + &boxes); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = _clip_and_composite_boxes (dst, op, source, + &boxes, antialias, + &extents, clip); + } + + _cairo_boxes_fini (&boxes); + + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + goto CLEANUP_BOXES; + } + + _cairo_polygon_init (&info.polygon); + _cairo_polygon_limit (&info.polygon, clip_boxes, num_boxes); + + status = _cairo_path_fixed_stroke_to_polygon (path, + stroke_style, + ctm, ctm_inverse, + tolerance, + &info.polygon); + if (unlikely (status)) + goto CLEANUP_POLYGON; + + if (extents.is_bounded) { + cairo_rectangle_int_t rect; + + _cairo_box_round_to_rectangle (&info.polygon.extents, &rect); + if (! _cairo_rectangle_intersect (&extents.bounded, &rect)) + goto CLEANUP_POLYGON; + } + + if (info.polygon.num_edges == 0) { + if (! extents.is_bounded) + status = i965_fixup_unbounded (dst, &extents, clip); + } else { + info.fill_rule = CAIRO_FILL_RULE_WINDING; + info.antialias = antialias; + status = i965_clip_and_composite_spans (dst, op, source, antialias, + _composite_polygon_spans, &info, + &extents, clip); + } + +CLEANUP_POLYGON: + _cairo_polygon_fini (&info.polygon); + +CLEANUP_BOXES: + if (clip_boxes != boxes_stack) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static cairo_int_status_t +i965_surface_fill (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t*source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + i965_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + composite_polygon_info_t info; + cairo_box_t boxes_stack[32], *clip_boxes = boxes_stack; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + int num_boxes = ARRAY_LENGTH (boxes_stack); + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_fill (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, path, + clip); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + assert (! path->is_empty_fill); + + if (_cairo_path_fixed_is_rectilinear_fill (path)) { + cairo_boxes_t boxes; + + _cairo_boxes_init (&boxes); + _cairo_boxes_limit (&boxes, clip_boxes, num_boxes); + status = _cairo_path_fixed_fill_rectilinear_to_boxes (path, + fill_rule, + &boxes); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = _clip_and_composite_boxes (dst, op, source, + &boxes, antialias, + &extents, clip); + } + + _cairo_boxes_fini (&boxes); + + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + goto CLEANUP_BOXES; + } + + _cairo_polygon_init (&info.polygon); + _cairo_polygon_limit (&info.polygon, clip_boxes, num_boxes); + + status = _cairo_path_fixed_fill_to_polygon (path, tolerance, &info.polygon); + if (unlikely (status)) + goto CLEANUP_POLYGON; + + if (extents.is_bounded) { + cairo_rectangle_int_t rect; + + _cairo_box_round_to_rectangle (&info.polygon.extents, &rect); + if (! _cairo_rectangle_intersect (&extents.bounded, &rect)) + goto CLEANUP_POLYGON; + } + + if (info.polygon.num_edges == 0) { + if (! extents.is_bounded) + status = i965_fixup_unbounded (dst, &extents, clip); + } else { + info.fill_rule = fill_rule; + info.antialias = antialias; + status = i965_clip_and_composite_spans (dst, op, source, antialias, + _composite_polygon_spans, &info, + &extents, clip); + } + +CLEANUP_POLYGON: + _cairo_polygon_fini (&info.polygon); + +CLEANUP_BOXES: + if (clip_boxes != boxes_stack) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static const cairo_surface_backend_t i965_surface_backend = { + CAIRO_SURFACE_TYPE_DRM, + + _cairo_drm_surface_create_similar, + i965_surface_finish, + intel_surface_acquire_source_image, + intel_surface_release_source_image, + + NULL, NULL, NULL, + NULL, /* composite */ + NULL, /* fill */ + NULL, /* trapezoids */ + NULL, /* span */ + NULL, /* check-span */ + + NULL, /* copy_page */ + NULL, /* show_page */ + _cairo_drm_surface_get_extents, + NULL, /* old-glyphs */ + _cairo_drm_surface_get_font_options, + + i965_surface_flush, + NULL, /* mark_dirty */ + intel_scaled_font_fini, + intel_scaled_glyph_fini, + + i965_surface_paint, + i965_surface_mask, + i965_surface_stroke, + i965_surface_fill, + i965_surface_glyphs, +}; + +static void +i965_surface_init (i965_surface_t *surface, + cairo_content_t content, + cairo_drm_device_t *device) +{ + intel_surface_init (&surface->intel, &i965_surface_backend, device, content); + surface->stream = 0; +} + +static inline int cairo_const +i965_tiling_stride (uint32_t tiling, int stride) +{ + if (tiling == I915_TILING_NONE) + return stride; + + return (stride + 127) & -128; +} + +static inline int cairo_const +i965_tiling_height (uint32_t tiling, int height) +{ + switch (tiling) { + default: + case I915_TILING_NONE: return (height + 1) & -2; + case I915_TILING_X: return (height + 7) & -8; + case I915_TILING_Y: return (height + 31) & -32; + } +} + +cairo_surface_t * +i965_surface_create_internal (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height, + uint32_t tiling, + cairo_bool_t gpu_target) +{ + i965_surface_t *surface; + cairo_status_t status_ignored; + + surface = malloc (sizeof (i965_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + i965_surface_init (surface, content, base_dev); + + if (width && height) { + uint32_t size; + + surface->intel.drm.width = width; + surface->intel.drm.height = height; + + width = (width + 3) & -4; + surface->intel.drm.stride = cairo_format_stride_for_width (surface->intel.drm.format, + width); + surface->intel.drm.stride = (surface->intel.drm.stride + 63) & ~63; + +#if 0 + /* check for tiny surfaces for which tiling is irrelevant */ + if (height * surface->intel.drm.stride < 4096) + tiling = I915_TILING_NONE; +#endif + surface->intel.drm.stride = i965_tiling_stride (tiling, + surface->intel.drm.stride); + + height = i965_tiling_height (tiling, height); + assert (height <= I965_MAX_SIZE); + + size = surface->intel.drm.stride * height; + if (tiling != I915_TILING_NONE) + size = (size + 4095) & -4096; + + surface->intel.drm.bo = &intel_bo_create (to_intel_device (&base_dev->base), + size, gpu_target)->base; + if (surface->intel.drm.bo == NULL) { + status_ignored = _cairo_drm_surface_finish (&surface->intel.drm); + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + + intel_bo_set_tiling (to_intel_device (&base_dev->base), + to_intel_bo (surface->intel.drm.bo), + tiling, surface->intel.drm.stride); + + assert (surface->intel.drm.bo->size >= (size_t) surface->intel.drm.stride*height); + } + + return &surface->intel.drm.base; +} + +static cairo_surface_t * +i965_surface_create (cairo_drm_device_t *device, + cairo_content_t content, int width, int height) +{ + return i965_surface_create_internal (device, content, width, height, + I965_TILING_DEFAULT, TRUE); +} + +static cairo_surface_t * +i965_surface_create_for_name (cairo_drm_device_t *base_dev, + unsigned int name, + cairo_format_t format, + int width, int height, int stride) +{ + i965_device_t *device; + i965_surface_t *surface; + cairo_content_t content; + cairo_status_t status_ignored; + int min_stride; + + min_stride = cairo_format_stride_for_width (format, (width + 3) & -4); + if (stride < min_stride || stride & 63) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_STRIDE)); + + if (format == CAIRO_FORMAT_A1) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); + + switch (format) { + case CAIRO_FORMAT_ARGB32: + content = CAIRO_CONTENT_COLOR_ALPHA; + break; + case CAIRO_FORMAT_RGB24: + content = CAIRO_CONTENT_COLOR; + break; + case CAIRO_FORMAT_A8: + content = CAIRO_CONTENT_ALPHA; + break; + default: + case CAIRO_FORMAT_A1: + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); + } + + surface = malloc (sizeof (i965_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + i965_surface_init (surface, content, base_dev); + + device = (i965_device_t *) base_dev; + surface->intel.drm.bo = &intel_bo_create_for_name (&device->intel, name)->base; + if (unlikely (surface->intel.drm.bo == NULL)) { + status_ignored = _cairo_drm_surface_finish (&surface->intel.drm); + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + + surface->intel.drm.width = width; + surface->intel.drm.height = height; + surface->intel.drm.stride = stride; + + return &surface->intel.drm.base; +} + +static cairo_status_t +i965_surface_enable_scan_out (void *abstract_surface) +{ + i965_surface_t *surface = abstract_surface; + intel_bo_t *bo; + + if (unlikely (surface->intel.drm.bo == NULL)) + return _cairo_error (CAIRO_STATUS_INVALID_SIZE); + + bo = to_intel_bo (surface->intel.drm.bo); + if (bo->tiling != I915_TILING_X) { + i965_device_t *device = i965_device (surface); + cairo_surface_pattern_t pattern; + cairo_surface_t *clone; + cairo_status_t status; + + clone = i965_surface_create_internal (&device->intel.base, + surface->intel.drm.base.content, + surface->intel.drm.width, + surface->intel.drm.height, + I915_TILING_X, + TRUE); + if (unlikely (clone->status)) + return clone->status; + + /* 2D blit? */ + _cairo_pattern_init_for_surface (&pattern, &surface->intel.drm.base); + pattern.base.filter = CAIRO_FILTER_NEAREST; + + status = _cairo_surface_paint (clone, + CAIRO_OPERATOR_SOURCE, + &pattern.base, + NULL); + + _cairo_pattern_fini (&pattern.base); + + if (unlikely (status)) { + cairo_surface_destroy (clone); + return status; + } + + /* swap buffer objects */ + surface->intel.drm.bo = ((cairo_drm_surface_t *) clone)->bo; + ((cairo_drm_surface_t *) clone)->bo = &bo->base; + bo = to_intel_bo (surface->intel.drm.bo); + + cairo_surface_destroy (clone); + } + + if (unlikely (bo->tiling == I915_TILING_Y)) + return _cairo_error (CAIRO_STATUS_INVALID_FORMAT); /* XXX */ + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_int_status_t +_i965_device_flush (cairo_drm_device_t *device) +{ + cairo_status_t status; + + status = cairo_device_acquire (&device->base); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = i965_device_flush ((i965_device_t *) device); + + cairo_device_release (&device->base); + + return status; +} + +static cairo_int_status_t +_i965_device_throttle (cairo_drm_device_t *device) +{ + cairo_status_t status; + + status = cairo_device_acquire (&device->base); + if (unlikely (status)) + return status; + + status = i965_device_flush ((i965_device_t *) device); + intel_throttle ((intel_device_t *) device); + + cairo_device_release (&device->base); + + return status; +} + +static void +_i965_device_destroy (void *base) +{ + i965_device_t *device = base; + + i965_device_reset (device); + i965_general_state_reset (device); + + _cairo_hash_table_destroy (device->sf_states); + _cairo_hash_table_destroy (device->samplers); + _cairo_hash_table_destroy (device->cc_states); + _cairo_hash_table_destroy (device->wm_kernels); + _cairo_hash_table_destroy (device->wm_states); + _cairo_hash_table_destroy (device->wm_bindings); + + _cairo_freelist_fini (&device->sf_freelist); + _cairo_freelist_fini (&device->cc_freelist); + _cairo_freelist_fini (&device->wm_kernel_freelist); + _cairo_freelist_fini (&device->wm_state_freelist); + _cairo_freelist_fini (&device->wm_binding_freelist); + _cairo_freelist_fini (&device->sampler_freelist); + + intel_device_fini (&device->intel); + free (device); +} + +static cairo_bool_t +hash_equal (const void *A, const void *B) +{ + const cairo_hash_entry_t *a = A, *b = B; + return a->hash == b->hash; +} + +cairo_drm_device_t * +_cairo_drm_i965_device_create (int fd, dev_t dev, int vendor_id, int chip_id) +{ + i965_device_t *device; + uint64_t gtt_size; + cairo_status_t status; + + if (! intel_info (fd, >t_size)) + return NULL; + + device = malloc (sizeof (i965_device_t)); + if (unlikely (device == NULL)) + return (cairo_drm_device_t *) _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); + + status = intel_device_init (&device->intel, fd); + if (unlikely (status)) + goto CLEANUP; + + device->is_g4x = IS_G4X (chip_id); + //device->is_g5x = IS_G5X (chip_id); + + device->intel.base.surface.create = i965_surface_create; + device->intel.base.surface.create_for_name = i965_surface_create_for_name; + device->intel.base.surface.create_from_cacheable_image = NULL; + device->intel.base.surface.enable_scan_out = i965_surface_enable_scan_out; + + device->intel.base.device.flush = _i965_device_flush; + device->intel.base.device.throttle = _i965_device_throttle; + device->intel.base.device.destroy = _i965_device_destroy; + + device->sf_states = _cairo_hash_table_create (i965_sf_state_equal); + if (unlikely (device->sf_states == NULL)) + goto CLEANUP_INTEL; + + _cairo_freelist_init (&device->sf_freelist, + sizeof (struct i965_sf_state)); + + + device->cc_states = _cairo_hash_table_create (i965_cc_state_equal); + if (unlikely (device->cc_states == NULL)) + goto CLEANUP_SF; + + _cairo_freelist_init (&device->cc_freelist, + sizeof (struct i965_cc_state)); + + + device->wm_kernels = _cairo_hash_table_create (hash_equal); + if (unlikely (device->wm_kernels == NULL)) + goto CLEANUP_CC; + + _cairo_freelist_init (&device->wm_kernel_freelist, + sizeof (struct i965_wm_kernel)); + + device->wm_states = _cairo_hash_table_create (i965_wm_state_equal); + if (unlikely (device->wm_states == NULL)) + goto CLEANUP_WM_KERNEL; + + _cairo_freelist_init (&device->wm_state_freelist, + sizeof (struct i965_wm_state)); + + + device->wm_bindings = _cairo_hash_table_create (i965_wm_binding_equal); + if (unlikely (device->wm_bindings == NULL)) + goto CLEANUP_WM_STATE; + + _cairo_freelist_init (&device->wm_binding_freelist, + sizeof (struct i965_wm_binding)); + + device->samplers = _cairo_hash_table_create (hash_equal); + if (unlikely (device->samplers == NULL)) + goto CLEANUP_WM_BINDING; + + _cairo_freelist_init (&device->sampler_freelist, + sizeof (struct i965_sampler)); + + i965_stream_init (&device->batch, + device->batch_base, sizeof (device->batch_base), + NULL, 0, + device->batch_relocations, + ARRAY_LENGTH (device->batch_relocations)); + + i965_stream_init (&device->surface, + device->surface_base, sizeof (device->surface_base), + device->surface_pending_relocations, + ARRAY_LENGTH (device->surface_pending_relocations), + device->surface_relocations, + ARRAY_LENGTH (device->surface_relocations)); + + i965_stream_init (&device->general, + device->general_base, sizeof (device->general_base), + device->general_pending_relocations, + ARRAY_LENGTH (device->general_pending_relocations), + NULL, 0); + + i965_stream_init (&device->vertex, + device->vertex_base, sizeof (device->vertex_base), + device->vertex_pending_relocations, + ARRAY_LENGTH (device->vertex_pending_relocations), + NULL, 0); + + i965_stream_init (&device->constant, + device->constant_base, sizeof (device->constant_base), + device->constant_pending_relocations, + ARRAY_LENGTH (device->constant_pending_relocations), + NULL, 0); + + cairo_list_init (&device->flush); + i965_device_reset (device); + device->vs_offset = (uint32_t) -1; + device->border_color_offset = (uint32_t) -1; + device->general_state = NULL; + + return _cairo_drm_device_init (&device->intel.base, + fd, dev, vendor_id, chip_id, + I965_MAX_SIZE); + + CLEANUP_WM_BINDING: + _cairo_hash_table_destroy (device->wm_bindings); + CLEANUP_WM_STATE: + _cairo_hash_table_destroy (device->wm_states); + CLEANUP_WM_KERNEL: + _cairo_hash_table_destroy (device->wm_kernels); + CLEANUP_CC: + _cairo_hash_table_destroy (device->cc_states); + CLEANUP_SF: + _cairo_hash_table_destroy (device->sf_states); + CLEANUP_INTEL: + intel_device_fini (&device->intel); + CLEANUP: + free (device); + return (cairo_drm_device_t *) _cairo_device_create_in_error (status); +} diff --git a/src/drm/cairo-drm-intel-brw-defines.h b/src/drm/cairo-drm-intel-brw-defines.h new file mode 100644 index 00000000..b2be36f1 --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-defines.h @@ -0,0 +1,824 @@ +/************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CAIRO_DRM_INTEL_BRW_DEFINES_H +#define CAIRO_DRM_INTEL_BRW_DEFINES_H + +/* 3D state: */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define BRW_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define BRW_PIPE_CONTROL BRW_3D(3, 2, 0) +#define BRW_PIPE_CONTROL_NOWRITE (0 << 14) +#define BRW_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define BRW_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define BRW_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define BRW_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define BRW_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define BRW_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define BRW_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define BRW_PIPE_CONTROL_LOCAL_PGTT (0 << 2) + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_BORDER_COLOR_MODE_DEFAULT 0 +#define BRW_BORDER_COLOR_MODE_LEGACY 1 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define BRW_VFCOMPONENT_NOSTORE 0 +#define BRW_VFCOMPONENT_STORE_SRC 1 +#define BRW_VFCOMPONENT_STORE_0 2 +#define BRW_VFCOMPONENT_STORE_1_FLT 3 +#define BRW_VFCOMPONENT_STORE_1_INT 4 +#define BRW_VFCOMPONENT_STORE_VID 5 +#define BRW_VFCOMPONENT_STORE_IID 6 +#define BRW_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_O 8 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + +/* media pipeline */ + +#define BRW_VFE_MODE_GENERIC 0x0 +#define BRW_VFE_MODE_VLD_MPEG2 0x1 +#define BRW_VFE_MODE_IS 0x2 +#define BRW_VFE_MODE_AVC_MC 0x4 +#define BRW_VFE_MODE_AVC_IT 0x7 +#define BRW_VFE_MODE_VC1_IT 0xB + +#define BRW_VFE_DEBUG_COUNTER_FREE 0 +#define BRW_VFE_DEBUG_COUNTER_FROZEN 1 +#define BRW_VFE_DEBUG_COUNTER_ONCE 2 +#define BRW_VFE_DEBUG_COUNTER_ALWAYS 3 + +/* VLD_STATE */ +#define BRW_MPEG_TOP_FIELD 1 +#define BRW_MPEG_BOTTOM_FIELD 2 +#define BRW_MPEG_FRAME 3 +#define BRW_MPEG_QSCALE_LINEAR 0 +#define BRW_MPEG_QSCALE_NONLINEAR 1 +#define BRW_MPEG_ZIGZAG_SCAN 0 +#define BRW_MPEG_ALTER_VERTICAL_SCAN 1 +#define BRW_MPEG_I_PICTURE 1 +#define BRW_MPEG_P_PICTURE 2 +#define BRW_MPEG_B_PICTURE 3 + +#endif diff --git a/src/drm/cairo-drm-intel-brw-eu-emit.c b/src/drm/cairo-drm-intel-brw-eu-emit.c new file mode 100644 index 00000000..05bac0a9 --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-eu-emit.c @@ -0,0 +1,1089 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "cairoint.h" +#include "cairo-drm-intel-brw-eu.h" + +#include <string.h> + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +void +brw_instruction_set_destination (struct brw_instruction *insn, + struct brw_reg dest) +{ + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; + } else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; + } else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +void +brw_instruction_set_source0 (struct brw_instruction *insn, + struct brw_reg reg) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } else { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_instruction *insn, + uint32_t msg_length, + uint32_t response_length, + uint32_t function, + uint32_t integer_type, + int low_precision, + int saturate, + uint32_t dataType ) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; +} + +static void brw_set_urb_message( struct brw_instruction *insn, + int allocate, + int used, + uint32_t msg_length, + uint32_t response_length, + int end_of_thread, + int complete, + uint32_t offset, + uint32_t swizzle_control ) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +void +brw_instruction_set_dp_write_message (struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t msg_length, + uint32_t pixel_scoreboard_clear, + uint32_t response_length, + uint32_t end_of_thread) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_read_message( struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t target_cache, + uint32_t msg_length, + uint32_t response_length, + uint32_t end_of_thread ) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + insn->bits3.dp_read.binding_table_index = binding_table_index; + insn->bits3.dp_read.msg_control = msg_control; + insn->bits3.dp_read.msg_type = msg_type; + insn->bits3.dp_read.target_cache = target_cache; + insn->bits3.dp_read.response_length = response_length; + insn->bits3.dp_read.msg_length = msg_length; + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits3.dp_read.end_of_thread = end_of_thread; +} + +static void +brw_set_sampler_message (struct brw_instruction *insn, + cairo_bool_t is_g4x, + uint32_t binding_table_index, + uint32_t sampler, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + cairo_bool_t eot) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + if (is_g4x) { + /* XXX presume the driver is sane! */ + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + insn->bits3.sampler_g4x.response_length = response_length; + insn->bits3.sampler_g4x.msg_length = msg_length; + insn->bits3.sampler_g4x.end_of_thread = eot; + insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } +} + +struct brw_instruction * +brw_next_instruction (struct brw_compile *p, + uint32_t opcode) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: */ + if (p->current->header.destreg__conditonalmod) { + p->current->header.destreg__conditonalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + +static struct brw_instruction *brw_alu1( struct brw_compile *p, + uint32_t opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = brw_next_instruction(p, opcode); + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src); + return insn; +} + +static struct brw_instruction *brw_alu2(struct brw_compile *p, + uint32_t opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = brw_next_instruction(p, opcode); + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ + struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ + struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + + ALU1(MOV) + ALU2(SEL) + ALU1(NOT) + ALU2(AND) + ALU2(OR) + ALU2(XOR) + ALU2(SHR) + ALU2(SHL) + ALU2(RSR) + ALU2(RSL) + ALU2(ASR) + ALU2(ADD) + ALU2(MUL) + ALU1(FRC) + ALU1(RNDD) + ALU1(RNDZ) + ALU2(MAC) + ALU2(MACH) + ALU1(LZD) + ALU2(DP4) + ALU2(DPH) + ALU2(DP3) + ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_NOP); + brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, uint32_t execute_size) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = brw_next_instruction(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = brw_next_instruction(p, BRW_OPCODE_IF); + } + + /* Override the defaults for this instruction: + */ + brw_instruction_set_destination (insn, brw_ip_reg ()); + brw_instruction_set_source0 (insn, brw_ip_reg ()); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + insn = brw_next_instruction(p, BRW_OPCODE_ADD); + } else { + insn = brw_next_instruction(p, BRW_OPCODE_ELSE); + } + + brw_instruction_set_destination (insn, brw_ip_reg ()); + brw_instruction_set_source0 (insn, brw_ip_reg ()); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + /* Patch the if instruction to point at this instruction. + */ + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); + + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pad0 = 0; + } + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; + + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_ENDIF); + + brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = brw_next_instruction(p, BRW_OPCODE_BREAK); + brw_instruction_set_destination(insn, brw_ip_reg()); + brw_instruction_set_source0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d (0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = brw_next_instruction(p, BRW_OPCODE_CONTINUE); + brw_instruction_set_destination(insn, brw_ip_reg()); + brw_instruction_set_source0(insn, brw_ip_reg()); + brw_set_src1 (insn, brw_imm_d (0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +/* DO/WHILE loop: +*/ +struct brw_instruction *brw_DO(struct brw_compile *p, uint32_t execute_size) +{ + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_instruction_set_destination(insn, brw_null_reg()); + brw_instruction_set_source0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + + return insn; + } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) + insn = brw_next_instruction(p, BRW_OPCODE_ADD); + else + insn = brw_next_instruction(p, BRW_OPCODE_WHILE); + + brw_instruction_set_destination(insn, brw_ip_reg()); + brw_instruction_set_source0(insn, brw_ip_reg()); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = do_insn - insn + 1; + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; +} + + +/* FORWARD JUMPS: +*/ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = (landing - jmp_insn) - 1; +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + uint32_t conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditonalmod = conditional; + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src0); + brw_set_src1(insn, src1); + + /* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/* Invert 8 values +*/ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t data_type, + uint32_t precision ) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditonalmod = msg_reg_nr; + + response_length = 1; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/* Use 2 send instructions to invert 16 elements +*/ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t precision ) +{ + struct brw_instruction *insn; + uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = brw_next_instruction(p, BRW_OPCODE_SEND); + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = brw_next_instruction(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditonalmod = msg_reg_nr+1; + + brw_instruction_set_destination(insn, offset(dest,1)); + brw_instruction_set_source0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + + + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + uint32_t msg_reg_nr, + uint32_t scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV (p, + retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d (scratch_offset)); + + brw_pop_insn_state(p); + } + + { + uint32_t msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src); + + brw_instruction_set_dp_write_message(insn, + 255, /* bti */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } + +} + + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + uint32_t scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV (p, + retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d (scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); /* UW? */ + brw_instruction_set_source0(insn, retype(brw_vec8_grf(0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(insn, + 255, /* bti */ + 3, /* msg_control */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t msg_length, + uint32_t response_length, + int eot) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src0); + brw_instruction_set_dp_write_message(insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + + +void brw_SAMPLE (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t sampler, + uint32_t writemask, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + cairo_bool_t eot) +{ + int need_stall = 0; + + if(writemask == 0) { + /* printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != WRITEMASK_XYZW) { + uint32_t dst_offset = 0; + uint32_t i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) + break; + dst_offset += 2; + } + for (; i < 4; i++) { + if (!(writemask & (1<<i))) + break; + newmask |= 1<<i; + len++; + } + + if (newmask != writemask) { + need_stall = 1; + /* printf("need stall %x %x\n", newmask , writemask); */ + } + else { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + newmask = ~newmask & WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, m1, brw_vec8_grf(0)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + + brw_pop_insn_state(p); + + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); + response_length = len * 2; + } + } + + { + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src0); + brw_set_sampler_message (insn, p->is_g4x, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot); + } + + if (need_stall) + { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, 0); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + int allocate, + int used, + uint32_t msg_length, + uint32_t response_length, + int eot, + int writes_complete, + uint32_t offset, + uint32_t swizzle) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_instruction_set_destination (insn, dest); + brw_instruction_set_source0 (insn, src0); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_urb_message (insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} diff --git a/src/drm/cairo-drm-intel-brw-eu-util.c b/src/drm/cairo-drm-intel-brw-eu-util.c new file mode 100644 index 00000000..592235b1 --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-eu-util.c @@ -0,0 +1,121 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "cairoint.h" +#include "cairo-drm-intel-brw-eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + uint32_t count) +{ + uint32_t i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + uint32_t delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + uint32_t count) +{ + uint32_t i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + uint32_t delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + uint32_t count) +{ + uint32_t i; + + for (i = 0; i < count; i++) + { + uint32_t delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + uint32_t count) +{ + uint32_t i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + uint32_t delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} diff --git a/src/drm/cairo-drm-intel-brw-eu.c b/src/drm/cairo-drm-intel-brw-eu.c new file mode 100644 index 00000000..51c3de4f --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-eu.c @@ -0,0 +1,250 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "cairoint.h" +#include "cairo-drm-intel-brw-eu.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, uint32_t value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, uint32_t pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, uint32_t conditional ) +{ + p->current->header.destreg__conditonalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, uint32_t access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, int compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, uint32_t value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, uint32_t value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + +/************************************************************************/ +void +brw_compile_init (struct brw_compile *p, + cairo_bool_t is_g4x) +{ + p->nr_insn = 0; + p->current = p->stack; + memset (p->current, 0, sizeof (p->current[0])); + + p->is_g4x = is_g4x; + + /* Some defaults? */ + brw_set_mask_control (p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate (p, 0); + brw_set_compression_control (p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value (p, 0xff); +} + +const uint32_t * +brw_get_program (struct brw_compile *p, + uint32_t *sz) +{ + *sz = p->nr_insn * sizeof (struct brw_instruction); + return (const uint32_t *)p->store; +} + + + +/** + * Subroutine calls require special attention. + * Mesa instructions may be expanded into multiple hardware instructions + * so the prog_instruction::BranchTarget field can't be used as an index + * into the hardware instructions. + * + * The BranchTarget field isn't needed, however. Mesa's GLSL compiler + * emits CAL and BGNSUB instructions with labels that can be used to map + * subroutine calls to actual subroutine code blocks. + * + * The structures and function here implement patching of CAL instructions + * so they jump to the right subroutine code... + */ + + +/** + * For each OPCODE_BGNSUB we create one of these. + */ +struct brw_glsl_label +{ + const char *name; /**< the label string */ + uint32_t position; /**< the position of the brw instruction for this label */ + struct brw_glsl_label *next; /**< next in linked list */ +}; + + +/** + * For each OPCODE_CAL we create one of these. + */ +struct brw_glsl_call +{ + uint32_t call_inst_pos; /**< location of the CAL instruction */ + const char *sub_name; /**< name of subroutine to call */ + struct brw_glsl_call *next; /**< next in linked list */ +}; + + +/** + * Called for each OPCODE_BGNSUB. + */ + void +brw_save_label(struct brw_compile *c, const char *name, uint32_t position) +{ + struct brw_glsl_label *label = calloc(1, sizeof *label); + label->name = name; + label->position = position; + label->next = c->first_label; + c->first_label = label; +} + + +/** + * Called for each OPCODE_CAL. + */ + void +brw_save_call(struct brw_compile *c, const char *name, uint32_t call_pos) +{ + struct brw_glsl_call *call = calloc(1, sizeof *call); + call->call_inst_pos = call_pos; + call->sub_name = name; + call->next = c->first_call; + c->first_call = call; +} + + +/** + * Lookup a label, return label's position/offset. + */ + static uint32_t +brw_lookup_label(struct brw_compile *c, const char *name) +{ + const struct brw_glsl_label *label; + for (label = c->first_label; label; label = label->next) { + if (strcmp(name, label->name) == 0) { + return label->position; + } + } + abort(); /* should never happen */ + return ~0; +} + + +/** + * When we're done generating code, this function is called to resolve + * subroutine calls. + */ + void +brw_resolve_cals(struct brw_compile *c) +{ + const struct brw_glsl_call *call; + + for (call = c->first_call; call; call = call->next) { + const uint32_t sub_loc = brw_lookup_label(c, call->sub_name); + struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; + struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; + int32_t offset = brw_sub_inst - brw_call_inst; + + /* patch brw_inst1 to point to brw_inst2 */ + brw_set_src1(brw_call_inst, brw_imm_d(offset * 16)); + } + + /* free linked list of calls */ + { + struct brw_glsl_call *call, *next; + for (call = c->first_call; call; call = next) { + next = call->next; + free(call); + } + c->first_call = NULL; + } + + /* free linked list of labels */ + { + struct brw_glsl_label *label, *next; + for (label = c->first_label; label; label = next) { + next = label->next; + free(label); + } + c->first_label = NULL; + } +} diff --git a/src/drm/cairo-drm-intel-brw-eu.h b/src/drm/cairo-drm-intel-brw-eu.h new file mode 100644 index 00000000..7a2a65ce --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-eu.h @@ -0,0 +1,1043 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef CAIRO_DRM_INTEL_BRW_EU_H +#define CAIRO_DRM_INTEL_BRW_EU_H + +#include "cairo-drm-intel-brw-structs.h" +#include "cairo-drm-intel-brw-defines.h" + +#include <assert.h> + + +/** + * Writemask values, 1 bit per component. + */ +#define WRITEMASK_X 0x1 +#define WRITEMASK_Y 0x2 +#define WRITEMASK_Z 0x4 +#define WRITEMASK_W 0x8 +#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y) +#define WRITEMASK_XZ (WRITEMASK_X | WRITEMASK_Z) +#define WRITEMASK_YZ (WRITEMASK_Y | WRITEMASK_Z) +#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z) +#define WRITEMASK_XW (WRITEMASK_X | WRITEMASK_W) +#define WRITEMASK_YW (WRITEMASK_Y | WRITEMASK_W) +#define WRITEMASK_XYW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_W) +#define WRITEMASK_ZW (WRITEMASK_Z | WRITEMASK_W) +#define WRITEMASK_XZW (WRITEMASK_X | WRITEMASK_Z | WRITEMASK_W) +#define WRITEMASK_YZW (WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W) +#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W) + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4 (0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4 (0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4 (0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4 (0,1,0,1) + +#define REG_SIZE (8*4) + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg { + uint32_t type:4; + uint32_t file:2; + uint32_t nr:8; + uint32_t subnr:5; /* :1 in align16 */ + uint32_t negate:1; /* source only */ + uint32_t abs:1; /* source only */ + uint32_t vstride:4; /* source only */ + uint32_t width:3; /* src only, align1 only */ + uint32_t hstride:2; /* align1 only */ + uint32_t address_mode:1; /* relative addressing, hopefully! */ + uint32_t pad0:1; + + union { + struct { + uint32_t swizzle:8; /* src only, align16 only */ + uint32_t writemask:4; /* dest only, align16 only */ + int32_t indirect_offset:10; /* relative addressing offset */ + uint32_t pad1:10; /* two dwords total */ + } bits; + + float f; + int32_t d; + uint32_t ud; + } dw1; +}; + +struct brw_indirect { + uint32_t addr_subnr:4; + int32_t addr_offset:10; + uint32_t pad:18; +}; + +struct brw_glsl_label; +struct brw_glsl_call; + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 200 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + uint32_t nr_insn; + + cairo_bool_t is_g4x; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + uint32_t flag_value; + int single_program_flow; + struct brw_context *brw; + + struct brw_glsl_label *first_label; /**< linked list of labels */ + struct brw_glsl_call *first_call; /**< linked list of CALs */ +}; + +cairo_private void +brw_save_label (struct brw_compile *c, + const char *name, + uint32_t position); + +cairo_private void +brw_save_call (struct brw_compile *c, + const char *name, + uint32_t call_pos); + +cairo_private void +brw_resolve_cals (struct brw_compile *c); + +static always_inline int +type_sz (uint32_t type) +{ + switch (type) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask WRITEMASK_X/Y/Z/W bitfield + */ +static always_inline struct brw_reg +brw_reg (uint32_t file, + uint32_t nr, + uint32_t subnr, + uint32_t type, + uint32_t vstride, + uint32_t width, + uint32_t hstride, + uint32_t swizzle, + uint32_t writemask) +{ + struct brw_reg reg; + + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < 128); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < 9); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + + return reg; +} + +/** Construct float[16] register */ +static always_inline struct brw_reg +brw_vec16_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static always_inline struct brw_reg +brw_vec8_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static always_inline struct brw_reg +brw_vec4_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static always_inline struct brw_reg +brw_vec2_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + WRITEMASK_XY); +} + +/** Construct float[1] register */ +static always_inline struct brw_reg +brw_vec1_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + +static always_inline struct brw_reg +retype (struct brw_reg reg, + uint32_t type) +{ + reg.type = type; + return reg; +} + +static always_inline struct brw_reg +suboffset (struct brw_reg reg, + uint32_t delta) +{ + reg.subnr += delta * type_sz (reg.type); + return reg; +} + +static always_inline struct brw_reg +offset (struct brw_reg reg, + uint32_t delta) +{ + reg.nr += delta; + return reg; +} + +static always_inline struct brw_reg +byte_offset (struct brw_reg reg, + uint32_t bytes) +{ + uint32_t newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + +/** Construct unsigned word[16] register */ +static always_inline struct brw_reg +brw_uw16_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return suboffset (retype (brw_vec16_reg (file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static always_inline struct brw_reg +brw_uw8_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return suboffset (retype (brw_vec8_reg (file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[2] register */ +static always_inline struct brw_reg +brw_uw2_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return suboffset (retype (brw_vec2_reg (file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static always_inline struct brw_reg +brw_uw1_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return suboffset (retype (brw_vec1_reg (file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static always_inline struct brw_reg +brw_imm_reg (uint32_t type) +{ + return brw_reg (BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static always_inline struct brw_reg brw_imm_f( float f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static always_inline struct brw_reg brw_imm_d( int32_t d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static always_inline struct brw_reg brw_imm_ud( uint32_t ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static always_inline struct brw_reg brw_imm_uw( uint16_t uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static always_inline struct brw_reg brw_imm_w( int16_t w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static always_inline +struct brw_reg brw_imm_v (uint32_t v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static always_inline struct brw_reg +brw_imm_vf (uint32_t v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static always_inline struct brw_reg +brw_imm_vf4 (uint32_t v0, + uint32_t v1, + uint32_t v2, + uint32_t v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + +static always_inline struct brw_reg +brw_address (struct brw_reg reg) +{ + return brw_imm_uw (reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static always_inline struct brw_reg +brw_vec1_grf (uint32_t nr, uint32_t subnr) +{ + return brw_vec1_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static always_inline struct brw_reg +brw_vec2_grf (uint32_t nr, uint32_t subnr) +{ + return brw_vec2_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static always_inline struct brw_reg +brw_vec4_grf (uint32_t nr, uint32_t subnr) +{ + return brw_vec4_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static always_inline struct brw_reg +brw_vec8_grf (uint32_t nr) +{ + return brw_vec8_reg (BRW_GENERAL_REGISTER_FILE, nr, 0); +} + +static always_inline struct brw_reg +brw_uw8_grf (uint32_t nr, uint32_t subnr) +{ + return brw_uw8_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static always_inline struct brw_reg +brw_uw16_grf (uint32_t nr, uint32_t subnr) +{ + return brw_uw16_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct null register (usually used for setting condition codes) */ +static always_inline struct brw_reg +brw_null_reg (void) +{ + return brw_vec8_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static always_inline struct brw_reg +brw_address_reg (uint32_t subnr) +{ + return brw_uw1_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static always_inline struct brw_reg +brw_ip_reg (void) +{ + return brw_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static always_inline struct brw_reg +brw_acc_reg (void) +{ + return brw_vec8_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + +static always_inline struct brw_reg +brw_flag_reg (void) +{ + return brw_uw1_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + +static always_inline struct brw_reg +brw_mask_reg (uint32_t subnr) +{ + return brw_uw1_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static always_inline struct brw_reg +brw_message4_reg (uint32_t nr) +{ + return brw_vec4_reg (BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + +static always_inline struct brw_reg +brw_message_reg (uint32_t nr) +{ + return brw_vec8_reg (BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static always_inline uint32_t +cvt (uint32_t val) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static always_inline struct brw_reg +stride (struct brw_reg reg, + uint32_t vstride, + uint32_t width, + uint32_t hstride) +{ + reg.vstride = cvt (vstride); + reg.width = cvt (width) - 1; + reg.hstride = cvt (hstride); + return reg; +} + +static always_inline struct brw_reg +vec16 (struct brw_reg reg) +{ + return stride (reg, 16,16,1); +} + +static always_inline struct brw_reg +vec8 (struct brw_reg reg) +{ + return stride (reg, 8,8,1); +} + +static always_inline struct brw_reg +vec4 (struct brw_reg reg) +{ + return stride (reg, 4,4,1); +} + +static always_inline struct brw_reg +vec2 (struct brw_reg reg) +{ + return stride (reg, 2,2,1); +} + +static always_inline struct brw_reg +vec1 (struct brw_reg reg) +{ + return stride (reg, 0,1,0); +} + +static always_inline struct brw_reg +get_element (struct brw_reg reg, uint32_t elt) +{ + return vec1 (suboffset (reg, elt)); +} + +static always_inline struct brw_reg +get_element_ud (struct brw_reg reg, uint32_t elt) +{ + return vec1 (suboffset (retype (reg, BRW_REGISTER_TYPE_UD), elt)); +} + +static always_inline struct brw_reg +brw_swizzle (struct brw_reg reg, + uint32_t x, + uint32_t y, + uint32_t z, + uint32_t w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4 (BRW_GET_SWZ (reg.dw1.bits.swizzle, x), + BRW_GET_SWZ (reg.dw1.bits.swizzle, y), + BRW_GET_SWZ (reg.dw1.bits.swizzle, z), + BRW_GET_SWZ (reg.dw1.bits.swizzle, w)); + return reg; +} + +static always_inline struct brw_reg +brw_swizzle1 (struct brw_reg reg, + uint32_t x) +{ + return brw_swizzle (reg, x, x, x, x); +} + +static always_inline struct brw_reg +brw_writemask (struct brw_reg reg, + uint32_t mask) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static always_inline struct brw_reg +brw_set_writemask (struct brw_reg reg, + uint32_t mask) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static always_inline struct brw_reg +negate (struct brw_reg reg) +{ + reg.negate ^= 1; + return reg; +} + +static always_inline struct brw_reg +brw_abs (struct brw_reg reg) +{ + reg.abs = 1; + return reg; +} + +static always_inline struct brw_reg +brw_vec4_indirect (uint32_t subnr, + int32_t offset) +{ + struct brw_reg reg = brw_vec4_grf (0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static always_inline struct brw_reg +brw_vec1_indirect (uint32_t subnr, + int32_t offset) +{ + struct brw_reg reg = brw_vec1_grf (0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static always_inline struct brw_reg +deref_4f (struct brw_indirect ptr, int32_t offset) +{ + return brw_vec4_indirect (ptr.addr_subnr, ptr.addr_offset + offset); +} + +static always_inline struct brw_reg +deref_1f(struct brw_indirect ptr, int32_t offset) +{ + return brw_vec1_indirect (ptr.addr_subnr, ptr.addr_offset + offset); +} + +static always_inline struct brw_reg +deref_4b(struct brw_indirect ptr, int32_t offset) +{ + return retype (deref_4f (ptr, offset), BRW_REGISTER_TYPE_B); +} + +static always_inline struct brw_reg +deref_1uw(struct brw_indirect ptr, int32_t offset) +{ + return retype (deref_1f (ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static always_inline struct brw_reg +deref_1d (struct brw_indirect ptr, int32_t offset) +{ + return retype (deref_1f (ptr, offset), BRW_REGISTER_TYPE_D); +} + +static always_inline struct brw_reg +deref_1ud (struct brw_indirect ptr, int32_t offset) +{ + return retype (deref_1f (ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static always_inline struct brw_reg +get_addr_reg (struct brw_indirect ptr) +{ + return brw_address_reg (ptr.addr_subnr); +} + +static always_inline struct brw_indirect +brw_indirect_offset (struct brw_indirect ptr, int32_t offset) +{ + ptr.addr_offset += offset; + return ptr; +} + +static always_inline struct brw_indirect +brw_indirect (uint32_t addr_subnr, int32_t offset) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +static always_inline struct brw_instruction * +current_insn (struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +cairo_private void brw_pop_insn_state (struct brw_compile *p); +cairo_private void brw_push_insn_state (struct brw_compile *p); +cairo_private void brw_set_mask_control (struct brw_compile *p, uint32_t value); +cairo_private void brw_set_saturate (struct brw_compile *p, uint32_t value); +cairo_private void brw_set_access_mode (struct brw_compile *p, uint32_t access_mode); +cairo_private void brw_set_compression_control (struct brw_compile *p, int control); +cairo_private void brw_set_predicate_control_flag_value (struct brw_compile *p, uint32_t value); +cairo_private void brw_set_predicate_control (struct brw_compile *p, uint32_t pc); +cairo_private void brw_set_conditionalmod (struct brw_compile *p, uint32_t conditional); + +cairo_private void +brw_compile_init (struct brw_compile *p, + cairo_bool_t is_g4x); +cairo_private const uint32_t *brw_get_program (struct brw_compile *p, uint32_t *sz); + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +cairo_private_no_warn struct brw_instruction * \ +brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +cairo_private_no_warn struct brw_instruction * \ +brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU1(RNDZ) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + +/* Helpers for SEND instruction: */ +cairo_private void +brw_urb_WRITE (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + int allocate, + int used, + uint32_t msg_length, + uint32_t response_length, + int eot, + int writes_complete, + uint32_t offset, + uint32_t swizzle); + +cairo_private void +brw_fb_WRITE (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t msg_length, + uint32_t response_length, + int eot); + +cairo_private void +brw_SAMPLE (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t sampler, + uint32_t writemask, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + cairo_bool_t eot); + +cairo_private void +brw_math_16 (struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t precision); + +cairo_private void +brw_math (struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t data_type, + uint32_t precision); + +cairo_private void +brw_dp_READ_16 (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + uint32_t scratch_offset); + +cairo_private void +brw_dp_WRITE_16 (struct brw_compile *p, + struct brw_reg src, + uint32_t msg_reg_nr, + uint32_t scratch_offset); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +cairo_private struct brw_instruction * +brw_IF (struct brw_compile *p, + uint32_t execute_size); + +cairo_private struct brw_instruction * +brw_ELSE (struct brw_compile *p, + struct brw_instruction *if_insn); + +cairo_private void +brw_ENDIF (struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: */ +cairo_private struct brw_instruction * +brw_DO (struct brw_compile *p, + uint32_t execute_size); + +cairo_private struct brw_instruction * +brw_WHILE (struct brw_compile *p, + struct brw_instruction *patch_insn); + +cairo_private struct brw_instruction * +brw_BREAK (struct brw_compile *p); + +cairo_private struct brw_instruction * +brw_CONT (struct brw_compile *p); + +/* Forward jumps: */ +cairo_private void +brw_land_fwd_jump (struct brw_compile *p, + struct brw_instruction *jmp_insn); + +cairo_private void +brw_NOP (struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +cairo_private void +brw_CMP (struct brw_compile *p, + struct brw_reg dest, + uint32_t conditional, + struct brw_reg src0, + struct brw_reg src1); + +cairo_private void +brw_print_reg (struct brw_reg reg); + +cairo_private struct brw_instruction * +brw_next_instruction (struct brw_compile *p, + uint32_t opcode); + +cairo_private void +brw_instruction_set_destination (struct brw_instruction *insn, + struct brw_reg dest); + +cairo_private void +brw_instruction_set_source0 (struct brw_instruction *insn, + struct brw_reg reg); + +cairo_private void +brw_instruction_set_dp_write_message (struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t msg_length, + uint32_t pixel_scoreboard_clear, + uint32_t response_length, + uint32_t end_of_thread); + +/*********************************************************************** + * brw_eu_util.c: + */ + +cairo_private void +brw_copy_indirect_to_indirect (struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + uint32_t count); + +cairo_private void +brw_copy_from_indirect (struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + uint32_t count); + +cairo_private void +brw_copy4 (struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + uint32_t count); + +cairo_private void +brw_copy8 (struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + uint32_t count); + +cairo_private void +brw_math_invert (struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +cairo_private void +brw_set_src1 (struct brw_instruction *insn, + struct brw_reg reg); + +#endif diff --git a/src/drm/cairo-drm-intel-brw-structs.h b/src/drm/cairo-drm-intel-brw-structs.h new file mode 100644 index 00000000..f42483ed --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-structs.h @@ -0,0 +1,1328 @@ +/************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CAIRO_DRM_INTEL_BRW_STRUCTS_H +#define CAIRO_DRM_INTEL_BRW_STRUCTS_H + +#include "cairo-types-private.h" + +/* Command packets: +*/ +struct header { + unsigned int length:16; + unsigned int opcode:16; +}; + +union header_union { + struct header bits; + unsigned int dword; +}; + +struct brw_3d_control { + struct { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:3; + unsigned int wc_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int operation:2; + unsigned int opcode:16; + } header; + + struct { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } dest; + + unsigned int dword2; + unsigned int dword3; +}; + + +struct brw_3d_primitive { + struct { + unsigned int length:8; + unsigned int pad:2; + unsigned int topology:5; + unsigned int indexed:1; + unsigned int opcode:16; + } header; + + unsigned int verts_per_instance; + unsigned int start_vert_location; + unsigned int instance_count; + unsigned int start_instance_location; + unsigned int base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush { + unsigned int flags:4; + unsigned int pad:12; + unsigned int opcode:16; +}; + +struct brw_vf_statistics { + unsigned int statistics_enable:1; + unsigned int pad:15; + unsigned int opcode:16; +}; + + +struct brw_binding_table_pointers { + struct header header; + unsigned int vs; + unsigned int gs; + unsigned int clp; + unsigned int sf; + unsigned int wm; +}; + +struct brw_blend_constant_color { + struct header header; + float blend_constant_color[4]; +}; + +struct brw_depthbuffer { + union header_union header; + + union { + struct { + unsigned int pitch:18; + unsigned int format:3; + unsigned int pad:4; + unsigned int depth_offset_disable:1; + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad2:1; + unsigned int surface_type:3; + } bits; + unsigned int dword; + } dword1; + + unsigned int dword2_base_addr; + + union { + struct { + unsigned int pad:1; + unsigned int mipmap_layout:1; + unsigned int lod:4; + unsigned int width:13; + unsigned int height:13; + } bits; + unsigned int dword; + } dword3; + + union { + struct { + unsigned int pad:12; + unsigned int min_array_element:9; + unsigned int depth:11; + } bits; + unsigned int dword; + } dword4; +}; + +struct brw_drawrect { + struct header header; + unsigned int xmin:16; + unsigned int ymin:16; + unsigned int xmax:16; + unsigned int ymax:16; + unsigned int xorg:16; + unsigned int yorg:16; +}; + +struct brw_global_depth_offset_clamp { + struct header header; + float depth_offset_clamp; +}; + +struct brw_indexbuffer { + union { + struct { + unsigned int length:8; + unsigned int index_format:2; + unsigned int cut_index_enable:1; + unsigned int pad:5; + unsigned int opcode:16; + } bits; + unsigned int dword; + } header; + unsigned int buffer_start; + unsigned int buffer_end; +}; + + +struct brw_line_stipple { + struct header header; + + struct { + unsigned int pattern:16; + unsigned int pad:16; + } bits0; + + struct { + unsigned int repeat_count:9; + unsigned int pad:7; + unsigned int inverse_repeat_count:16; + } bits1; +}; + +struct brw_pipelined_state_pointers { + struct header header; + + struct { + unsigned int pad:5; + unsigned int offset:27; + } vs; + + struct { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } gs; + + struct { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } clp; + + struct { + unsigned int pad:5; + unsigned int offset:27; + } sf; + + struct { + unsigned int pad:5; + unsigned int offset:27; + } wm; + + struct { + unsigned int pad:6; + unsigned int offset:26; + } cc; +}; + +struct brw_polygon_stipple_offset { + struct header header; + + struct { + unsigned int y_offset:5; + unsigned int pad:3; + unsigned int x_offset:5; + unsigned int pad0:19; + } bits0; +}; + +struct brw_polygon_stipple { + struct header header; + unsigned int stipple[32]; +}; + +struct brw_pipeline_select { + struct { + unsigned int pipeline_select:1; + unsigned int pad:15; + unsigned int opcode:16; + } header; +}; + +struct brw_pipe_control { + struct { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:2; + unsigned int instruction_state_cache_flush_enable:1; + unsigned int write_cache_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int post_sync_operation:2; + + unsigned int opcode:16; + } header; + + struct { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } bits1; + + unsigned int data0; + unsigned int data1; +}; + + +struct brw_urb_fence { + struct { + unsigned int length:8; + unsigned int vs_realloc:1; + unsigned int gs_realloc:1; + unsigned int clp_realloc:1; + unsigned int sf_realloc:1; + unsigned int vfe_realloc:1; + unsigned int cs_realloc:1; + unsigned int pad:2; + unsigned int opcode:16; + } header; + + struct { + unsigned int vs_fence:10; + unsigned int gs_fence:10; + unsigned int clp_fence:10; + unsigned int pad:2; + } bits0; + + struct { + unsigned int sf_fence:10; + unsigned int vf_fence:10; + unsigned int cs_fence:10; + unsigned int pad:2; + } bits1; +}; + +struct brw_constant_buffer_state { + struct header header; + + struct { + unsigned int nr_urb_entries:3; + unsigned int pad:1; + unsigned int urb_entry_size:5; + unsigned int pad0:23; + } bits0; +}; + +struct brw_constant_buffer { + struct { + unsigned int length:8; + unsigned int valid:1; + unsigned int pad:7; + unsigned int opcode:16; + } header; + + struct { + unsigned int buffer_length:6; + unsigned int buffer_address:26; + } bits0; +}; + +struct brw_state_base_address { + struct header header; + + struct { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int general_state_address:27; + } bits0; + + struct { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int surface_state_address:27; + } bits1; + + struct { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int indirect_object_state_address:27; + } bits2; + + struct { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int general_state_upper_bound:20; + } bits3; + + struct { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch { + struct header header; + + struct { + unsigned int prefetch_count:3; + unsigned int pad:3; + unsigned int prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer { + struct header header; + + struct { + unsigned int pad:4; + unsigned int system_instruction_pointer:28; + } bits0; +}; + + +/* State structs for the various fixed function units: +*/ + +struct thread0 { + unsigned int pad0:1; + unsigned int grf_reg_count:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer:26; +}; + +struct thread1 { + unsigned int ext_halt_exception_enable:1; + unsigned int sw_exception_enable:1; + unsigned int mask_stack_exception_enable:1; + unsigned int timeout_exception_enable:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad0:3; + unsigned int depth_coef_urb_read_offset:6; /* WM only */ + unsigned int pad1:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad3:5; + unsigned int single_program_flow:1; +}; + +struct thread2 { + unsigned int per_thread_scratch_space:4; + unsigned int pad0:6; + unsigned int scratch_space_base_pointer:22; +}; + +struct thread3 { + unsigned int dispatch_grf_start_reg:4; + unsigned int urb_entry_read_offset:6; + unsigned int pad0:1; + unsigned int urb_entry_read_length:6; + unsigned int pad1:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int pad2:1; + unsigned int const_urb_entry_read_length:6; + unsigned int pad3:1; +}; + +struct brw_clip_unit_state { + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int pad0:9; + unsigned int gs_output_stats:1; /* not always */ + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; /* may be less */ + unsigned int pad3:1; + } thread4; + + struct { + unsigned int pad0:13; + unsigned int clip_mode:3; + unsigned int userclip_enable_flags:8; + unsigned int userclip_must_clip:1; + unsigned int pad1:1; + unsigned int guard_band_enable:1; + unsigned int viewport_z_clip_enable:1; + unsigned int viewport_xy_clip_enable:1; + unsigned int vertex_position_space:1; + unsigned int api_mode:1; + unsigned int pad2:1; + } clip5; + + struct { + unsigned int pad0:5; + unsigned int clipper_viewport_state_ptr:27; + } clip6; + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + +struct brw_cc_unit_state { + struct { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } cc0; + + struct { + unsigned int bf_stencil_ref:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + unsigned int stencil_ref:8; + } cc1; + + struct { + unsigned int logicop_enable:1; + unsigned int pad0:10; + unsigned int depth_write_enable:1; + unsigned int depth_test_function:3; + unsigned int depth_test:1; + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + } cc2; + + struct { + unsigned int pad0:8; + unsigned int alpha_test_func:3; + unsigned int alpha_test:1; + unsigned int blend_enable:1; + unsigned int ia_blend_enable:1; + unsigned int pad1:1; + unsigned int alpha_test_format:1; + unsigned int pad2:16; + } cc3; + + struct { + unsigned int pad0:5; + unsigned int cc_viewport_state_offset:27; + } cc4; + + struct { + unsigned int pad0:2; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_src_blend_factor:5; + unsigned int ia_blend_function:3; + unsigned int statistics_enable:1; + unsigned int logicop_func:4; + unsigned int pad1:11; + unsigned int dither_enable:1; + } cc5; + + struct { + unsigned int clamp_post_alpha_blend:1; + unsigned int clamp_pre_alpha_blend:1; + unsigned int clamp_range:2; + unsigned int pad0:11; + unsigned int y_dither_offset:2; + unsigned int x_dither_offset:2; + unsigned int dest_blend_factor:5; + unsigned int src_blend_factor:5; + unsigned int blend_function:3; + } cc6; + + struct { + union { + float f; + unsigned char ub[4]; + } alpha_ref; + } cc7; +}; + +struct brw_sf_unit_state { + struct thread0 thread0; + struct { + unsigned int pad0:7; + unsigned int sw_exception_enable:1; + unsigned int pad1:3; + unsigned int mask_stack_exception_enable:1; + unsigned int pad2:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad3:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad4:5; + unsigned int single_program_flow:1; + } sf1; + + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; + unsigned int pad3:1; + } thread4; + + struct { + unsigned int front_winding:1; + unsigned int viewport_transform:1; + unsigned int pad0:3; + unsigned int sf_viewport_state_offset:27; + } sf5; + + struct { + unsigned int pad0:9; + unsigned int dest_org_vbias:4; + unsigned int dest_org_hbias:4; + unsigned int scissor:1; + unsigned int disable_2x2_trifilter:1; + unsigned int disable_zero_pix_trifilter:1; + unsigned int point_rast_rule:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int line_width:4; + unsigned int fast_scissor_disable:1; + unsigned int cull_mode:2; + unsigned int aa_enable:1; + } sf6; + + struct { + unsigned int point_size:11; + unsigned int use_point_size_state:1; + unsigned int subpixel_precision:1; + unsigned int sprite_point:1; + unsigned int pad0:11; + unsigned int trifan_pv:2; + unsigned int linestrip_pv:2; + unsigned int tristrip_pv:2; + unsigned int line_last_pixel_enable:1; + } sf7; +}; + +struct brw_gs_unit_state { + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:1; + unsigned int pad3:6; + } thread4; + + struct { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } gs5; + + struct { + unsigned int max_vp_index:4; + unsigned int pad0:26; + unsigned int reorder_enable:1; + unsigned int pad1:1; + } gs6; +}; + +struct brw_vs_unit_state { + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:4; + unsigned int pad3:3; + } thread4; + + struct { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } vs5; + + struct { + unsigned int vs_enable:1; + unsigned int vert_cache_disable:1; + unsigned int pad0:30; + } vs6; +}; + +struct brw_wm_unit_state { + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int stats_enable:1; + unsigned int pad0:1; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } wm4; + + struct { + unsigned int enable_8_pix:1; + unsigned int enable_16_pix:1; + unsigned int enable_32_pix:1; + unsigned int pad0:7; + unsigned int legacy_global_depth_bias:1; + unsigned int line_stipple:1; + unsigned int depth_offset:1; + unsigned int polygon_stipple:1; + unsigned int line_aa_region_width:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int early_depth_test:1; + unsigned int thread_dispatch_enable:1; + unsigned int program_uses_depth:1; + unsigned int program_computes_depth:1; + unsigned int program_uses_killpixel:1; + unsigned int legacy_line_rast: 1; + unsigned int transposed_urb_read:1; + unsigned int max_threads:7; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; +}; + +/* The hardware supports two different modes for border color. The + * default (OpenGL) mode uses floating-point color channels, while the + * legacy mode uses 4 bytes. + * + * More significantly, the legacy mode respects the components of the + * border color for channels not present in the source, (whereas the + * default mode will ignore the border color's alpha channel and use + * alpha==1 for an RGB source, for example). + * + * The legacy mode matches the semantics specified by the Render + * extension. + */ +struct brw_sampler_default_border_color { + float color[4]; +}; + +struct brw_sampler_legacy_border_color { + uint8_t color[4]; +}; + +struct brw_sampler_state { + struct { + unsigned int shadow_function:3; + unsigned int lod_bias:11; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; + unsigned int pad:1; + unsigned int lod_preclamp:1; + unsigned int border_color_mode:1; + unsigned int pad0:1; + unsigned int disable:1; + } ss0; + + struct { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; + unsigned int pad:3; + unsigned int max_lod:10; + unsigned int min_lod:10; + } ss1; + + struct { + unsigned int pad:5; + unsigned int border_color_pointer:27; + } ss2; + + struct { + unsigned int pad:19; + unsigned int max_aniso:3; + unsigned int chroma_key_mode:1; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int monochrome_filter_width:3; + unsigned int monochrome_filter_height:3; + } ss3; +}; + +struct brw_clipper_viewport { + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct brw_cc_viewport { + float min_depth; + float max_depth; +}; + +struct brw_sf_viewport { + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + struct { + short xmin; + short ymin; + short xmax; + short ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... +*/ +struct brw_surface_state { + struct { + unsigned int cube_pos_z:1; + unsigned int cube_neg_z:1; + unsigned int cube_pos_y:1; + unsigned int cube_neg_y:1; + unsigned int cube_pos_x:1; + unsigned int cube_neg_x:1; + unsigned int pad:3; + unsigned int render_cache_read_mode:1; + unsigned int mipmap_layout_mode:1; + unsigned int vert_line_stride_ofs:1; + unsigned int vert_line_stride:1; + unsigned int color_blend:1; + unsigned int writedisable_blue:1; + unsigned int writedisable_green:1; + unsigned int writedisable_red:1; + unsigned int writedisable_alpha:1; + unsigned int surface_format:9; + unsigned int data_return_format:1; + unsigned int pad0:1; + unsigned int surface_type:3; + } ss0; + + struct { + unsigned int base_addr; + } ss1; + + struct { + unsigned int render_target_rotation:2; + unsigned int mip_count:4; + unsigned int width:13; + unsigned int height:13; + } ss2; + + struct { + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad:1; + unsigned int pitch:18; + unsigned int depth:11; + } ss3; + + struct { + unsigned int pad:19; + unsigned int min_array_elt:9; + unsigned int min_lod:4; + } ss4; + + struct { + unsigned int pad:20; + unsigned int y_offset:4; + unsigned int pad2:1; + unsigned int x_offset:7; + } ss5; +}; + +struct brw_vertex_buffer_state { + struct { + unsigned int pitch:11; + unsigned int pad:15; + unsigned int access_type:1; + unsigned int vb_index:5; + } vb0; + + unsigned int start_addr; + unsigned int max_index; +#if 1 + unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + +struct brw_vertex_element_state { + struct { + unsigned int src_offset:11; + unsigned int pad:5; + unsigned int src_format:9; + unsigned int pad0:1; + unsigned int valid:1; + unsigned int vertex_buffer_index:5; + } ve0; + + struct { + unsigned int dst_offset:8; + unsigned int pad:8; + unsigned int vfcomponent3:4; + unsigned int vfcomponent2:4; + unsigned int vfcomponent1:4; + unsigned int vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; +}; + +struct brw_urb_immediate { + unsigned int opcode:4; + unsigned int offset:6; + unsigned int swizzle_control:2; + unsigned int pad:1; + unsigned int allocate:1; + unsigned int used:1; + unsigned int complete:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; +}; + +/* Instruction format for the execution units: */ + +struct brw_instruction { + struct { + unsigned int opcode:7; + unsigned int pad:1; + unsigned int access_mode:1; + unsigned int mask_control:1; + unsigned int dependency_control:2; + unsigned int compression_control:2; + unsigned int thread_control:2; + unsigned int predicate_control:4; + unsigned int predicate_inverse:1; + unsigned int execution_size:3; + unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + unsigned int pad0:2; + unsigned int debug_control:1; + unsigned int saturate:1; + } header; + + union { + struct { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad:1; + unsigned int dest_subreg_nr:5; + unsigned int dest_reg_nr:8; + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } da1; + + struct { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad:6; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } ia1; + + struct { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad0:1; + unsigned int dest_writemask:4; + unsigned int dest_subreg_nr:1; + unsigned int dest_reg_nr:8; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } da16; + + struct { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad0:6; + unsigned int dest_writemask:4; + int dest_indirect_offset:6; + unsigned int dest_subreg_nr:3; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct { + unsigned int src0_subreg_nr:5; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } da1; + + struct { + int src0_indirect_offset:10; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } ia1; + + struct { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + unsigned int src0_subreg_nr:1; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } da16; + + struct { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + int src0_indirect_offset:6; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia16; + + } bits2; + + union { + struct { + unsigned int src1_subreg_nr:5; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int pad0:7; + } da1; + + struct { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + unsigned int src1_subreg_nr:1; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int pad2:7; + } da16; + + struct { + int src1_indirect_offset:10; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia1; + + struct { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + int src1_indirect_offset:6; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad2:6; + } ia16; + + struct { + int jump_count:16; /* note: signed */ + unsigned int pop_count:4; + unsigned int pad0:12; + } if_else; + + struct { + unsigned int function:4; + unsigned int int_type:1; + unsigned int precision:1; + unsigned int saturate:1; + unsigned int data_type:1; + unsigned int pad0:8; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } math; + + struct { + unsigned int binding_table_index:8; + unsigned int sampler:4; + unsigned int return_format:2; + unsigned int msg_type:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } sampler; + + struct { + uint32_t binding_table_index:8; + uint32_t sampler:4; + uint32_t msg_type:4; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; + } sampler_g4x; + + struct brw_urb_immediate urb; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:4; + unsigned int msg_type:2; + unsigned int target_cache:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_read; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:3; + unsigned int pixel_scoreboard_clear:1; + unsigned int msg_type:3; + unsigned int send_commit_msg:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_write; + + struct { + unsigned int pad:16; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } generic; + + uint32_t ud; + int32_t d; + } bits3; +}; + +/* media pipeline */ + +struct brw_vfe_state { + struct { + unsigned int per_thread_scratch_space:4; + unsigned int pad3:3; + unsigned int extend_vfe_state_present:1; + unsigned int pad2:2; + unsigned int scratch_base:22; + } vfe0; + + struct { + unsigned int debug_counter_control:2; + unsigned int children_present:1; + unsigned int vfe_mode:4; + unsigned int pad2:2; + unsigned int num_urb_entries:7; + unsigned int urb_entry_alloc_size:9; + unsigned int max_threads:7; + } vfe1; + + struct { + unsigned int pad4:4; + unsigned int interface_descriptor_base:28; + } vfe2; +}; + +struct brw_vld_state { + struct { + unsigned int pad6:6; + unsigned int scan_order:1; + unsigned int intra_vlc_format:1; + unsigned int quantizer_scale_type:1; + unsigned int concealment_motion_vector:1; + unsigned int frame_predict_frame_dct:1; + unsigned int top_field_first:1; + unsigned int picture_structure:2; + unsigned int intra_dc_precision:2; + unsigned int f_code_0_0:4; + unsigned int f_code_0_1:4; + unsigned int f_code_1_0:4; + unsigned int f_code_1_1:4; + } vld0; + + struct { + unsigned int pad2:9; + unsigned int picture_coding_type:2; + unsigned int pad:21; + } vld1; + + struct { + unsigned int index_0:4; + unsigned int index_1:4; + unsigned int index_2:4; + unsigned int index_3:4; + unsigned int index_4:4; + unsigned int index_5:4; + unsigned int index_6:4; + unsigned int index_7:4; + } desc_remap_table0; + + struct { + unsigned int index_8:4; + unsigned int index_9:4; + unsigned int index_10:4; + unsigned int index_11:4; + unsigned int index_12:4; + unsigned int index_13:4; + unsigned int index_14:4; + unsigned int index_15:4; + } desc_remap_table1; +}; + +struct brw_interface_descriptor { + struct { + unsigned int grf_reg_blocks:4; + unsigned int pad:2; + unsigned int kernel_start_pointer:26; + } desc0; + + struct { + unsigned int pad:7; + unsigned int software_exception:1; + unsigned int pad2:3; + unsigned int maskstack_exception:1; + unsigned int pad3:1; + unsigned int illegal_opcode_exception:1; + unsigned int pad4:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int single_program_flow:1; + unsigned int pad5:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int const_urb_entry_read_len:6; + } desc1; + + struct { + unsigned int pad:2; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } desc2; + + struct { + unsigned int binding_table_entry_count:5; + unsigned int binding_table_pointer:27; + } desc3; +}; + +#endif diff --git a/src/drm/cairo-drm-intel-command-private.h b/src/drm/cairo-drm-intel-command-private.h new file mode 100644 index 00000000..3860c3f2 --- /dev/null +++ b/src/drm/cairo-drm-intel-command-private.h @@ -0,0 +1,909 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CAIRO_DRM_INTEL_COMMAND_PRIVATE_H +#define CAIRO_DRM_INTEL_COMMAND_PRIVATE_H + +#include "cairo-types-private.h" + +#define CMD_MI (0x0 << 29) +#define CMD_misc (0x1 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) +/* 4-7 reserved */ + +#define MI_NOOP (CMD_MI | 0) +/* Batch */ +#define MI_BATCH_BUFFER (CMD_MI | (0x30 << 23) | 1) +#define MI_BATCH_BUFFER_START (CMD_MI | (0x31 << 23)) +#define MI_BATCH_BUFFER_END (CMD_MI | (0x0a << 23)) +#define MI_BATCH_NON_SECURE (1) +#define MI_BATCH_NON_SECURE_I965 (1 << 8) +/* Flush */ +#define MI_FLUSH (CMD_MI | (0x04 << 23)) +#define MI_WRITE_DIRTY_STATE (1<<4) +#define MI_END_SCENE (1<<3) +#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3) +#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2) +#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1) +#define MI_INVALIDATE_MAP_CACHE (1<<0) + +#define PRIM3D (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (PRIM3D | (0x0<<18)) +#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18)) +#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18)) +#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18)) +#define PRIM3D_POLY (PRIM3D | (0x4<<18)) +#define PRIM3D_LINELIST (PRIM3D | (0x5<<18)) +#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18)) +#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18)) +#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18)) +#define PRIM3D_DIB (PRIM3D | (0x9<<18)) +#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18)) +#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18)) +#define PRIM3D_MASK (0x1f<<18) +#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17)) +#define PRIM3D_INDIRECT_ELTS ((1<<23) | (1<<17)) + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (x) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + +/* The depth subrectangle is not supported, but must be disabled. */ +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0)) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLR_BUF_8BIT 0 +#define COLR_BUF_RGB555 (1<<8) +#define COLR_BUF_RGB565 (2<<8) +#define COLR_BUF_ARGB8888 (3<<8) +#define COLR_BUF_ARGB4444 (8<<8) +#define COLR_BUF_ARGB1555 (9<<8) +#define COLR_BUF_ARGB2AAA (0xa<<8) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 (0<<1) +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0U) + +#define TEXCOORD_WRAP_SHORTEST_TCX 8 +#define TEXCOORD_WRAP_SHORTEST_TCY 4 +#define TEXCOORD_WRAP_SHORTEST_TCZ 2 +#define TEXCOORD_PERSPECTIVE_DISABLE 1 + +#define S3_WRAP_SHORTEST_TCX(unit) (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCY(unit) (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCZ(unit) (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4)) +#define S3_PERSPECTIVE_DISABLE(unit) (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4)) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + +/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ + +/* _3DSTATE_MODES_4, p218 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK (0xf<<18) +#define LOGICOP_COPY 0xc +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) ((x)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p220 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define PIPELINE_FLUSH_RENDER_CACHE (1<<18) +#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16) + +/* p221 */ +#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16)) +#define PS1_REG(n) (1<<(n)) +#define PS2_CONST_X(n) (n) +#define PS3_CONST_Y(n) (n) +#define PS4_CONST_Z(n) (n) +#define PS5_CONST_W(n) (n) + +/* p222 */ + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + +/* p207. + * The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK + */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +/* The pitch is the pitch measured in DWORDS, minus 1 */ +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244. + * The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK. + */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define FLUSH_MAP_CACHE (1<<0) +#define FLUSH_RENDER_CACHE (1<<1) + +/* BLT commands */ +#define COLOR_BLT_CMD (CMD_2D | (0x40 << 22) | 3) +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) +#define XY_SETUP_CLIP_BLT_CMD (CMD_2D | (0x03 << 22) | 1) +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) +#define SRC_COPY_BLT_CMD (CMD_2D | (0x43 << 22) | 4) + +#define XY_MONO_PAT_BLT_CMD (CMD_2D | (0x52<<22)|0x7) +#define XY_MONO_PAT_VERT_SEED ((1<<10) | (1<<9)|(1<<8)) +#define XY_MONO_PAT_HORT_SEED ((1<<14) | (1<<13)|(1<<12)) +#define XY_MONO_SRC_BLT_CMD (CMD_2D | (0x54<<22)|(0x6)) + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) +#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22)) +#define XY_TEXT_BYTE_PACKED (1 << 16) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#endif /* CAIRO_DRM_INTEL_COMMAND_PRIVATE_H */ diff --git a/src/drm/cairo-drm-intel-debug.c b/src/drm/cairo-drm-intel-debug.c new file mode 100644 index 00000000..cc2e47a1 --- /dev/null +++ b/src/drm/cairo-drm-intel-debug.c @@ -0,0 +1,1208 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "cairoint.h" +#include "cairo-drm-intel-private.h" + +struct debug_stream { + unsigned offset; /* current gtt offset */ + const char *ptr; /* pointer to gtt offset zero */ + const char *end; /* pointer to gtt offset zero */ +}; + +static cairo_bool_t +debug (struct debug_stream *stream, const char *name, uint32_t len) +{ + uint32_t i; + const uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + + if (len == 0) { + fprintf (stderr, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + ASSERT_NOT_REACHED; + return FALSE; + } + + fprintf (stderr, "%04x: ", stream->offset); + + fprintf (stderr, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + fprintf (stderr, "\t0x%08x\n", ptr[i]); + fprintf (stderr, "\n"); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + + +static const char * +get_prim_name (uint32_t val) +{ + switch (val & PRIM3D_MASK) { + case PRIM3D_TRILIST: return "TRILIST"; + case PRIM3D_TRISTRIP: return "TRISTRIP"; + case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; + case PRIM3D_TRIFAN: return "TRIFAN"; + case PRIM3D_POLY: return "POLY"; + case PRIM3D_LINELIST: return "LINELIST"; + case PRIM3D_LINESTRIP: return "LINESTRIP"; + case PRIM3D_RECTLIST: return "RECTLIST"; + case PRIM3D_POINTLIST: return "POINTLIST"; + case PRIM3D_DIB: return "DIB"; + case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; + case PRIM3D_ZONE_INIT: return "ZONE_INIT"; + default: return "????"; + } +} + +static cairo_bool_t +debug_prim (struct debug_stream *stream, + const char *name, + cairo_bool_t dump_floats, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + uint32_t i; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s %s (%d dwords):\n", name, prim, len); + fprintf (stderr, "\t0x%08x\n", ptr[0]); + for (i = 1; i < len; i++) { + if (dump_floats) + fprintf (stderr, "\t0x%08x // %f\n", ptr[i], *(float *)&ptr[i]); + else + fprintf (stderr, "\t0x%08x\n", ptr[i]); + } + + fprintf (stderr, "\n"); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +static const char *opcodes[] = { + "NOP", + "ADD", + "MOV", + "MUL", + "MAD", + "DP2ADD", + "DP3", + "DP4", + "FRC", + "RCP", + "RSQ", + "EXP", + "LOG", + "CMP", + "MIN", + "MAX", + "FLR", + "MOD", + "TRC", + "SGE", + "SLT", + "TEXLD", + "TEXLDP", + "TEXLDB", + "TEXKILL", + "DCL", + "0x1a", + "0x1b", + "0x1c", + "0x1d", + "0x1e", + "0x1f", +}; + +static const int args[] = { + 0, /* 0 nop */ + 2, /* 1 add */ + 1, /* 2 mov */ + 2, /* 3 m ul */ + 3, /* 4 mad */ + 3, /* 5 dp2add */ + 2, /* 6 dp3 */ + 2, /* 7 dp4 */ + 1, /* 8 frc */ + 1, /* 9 rcp */ + 1, /* a rsq */ + 1, /* b exp */ + 1, /* c log */ + 3, /* d cmp */ + 2, /* e min */ + 2, /* f max */ + 1, /* 10 flr */ + 1, /* 11 mod */ + 1, /* 12 trc */ + 2, /* 13 sge */ + 2, /* 14 slt */ + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + +static const char *regname[] = { + "R", + "T", + "CONST", + "S", + "OC", + "OD", + "U", + "UNKNOWN", +}; + +static void +print_reg_type_nr(uint32_t type, uint32_t nr) +{ + switch (type) { + case REG_TYPE_T: + switch (nr) { + case T_DIFFUSE: + fprintf (stderr, "T_DIFFUSE"); + return; + case T_SPECULAR: + fprintf (stderr, "T_SPECULAR"); + return; + case T_FOG_W: + fprintf (stderr, "T_FOG_W"); + return; + default: + fprintf (stderr, "T_TEX%d", nr); + return; + } + case REG_TYPE_OC: + if (nr == 0) { + fprintf (stderr, "oC"); + return; + } + break; + case REG_TYPE_OD: + if (nr == 0) { + fprintf (stderr, "oD"); + return; + } + break; + default: + break; + } + + fprintf (stderr, "%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ + (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ + (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ + (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + +static void +print_reg_neg_swizzle(uint32_t reg) +{ + int i; + + if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && + (reg & REG_NEGATE_MASK) == 0) + return; + + fprintf (stderr, "."); + + for (i = 12; i >= 0; i -= 4) { + if (reg & (8 << i)) + fprintf (stderr, "-"); + + switch ((reg >> i) & 0x7) { + case 0: + fprintf (stderr, "x"); + break; + case 1: + fprintf (stderr, "y"); + break; + case 2: + fprintf (stderr, "z"); + break; + case 3: + fprintf (stderr, "w"); + break; + case 4: + fprintf (stderr, "0"); + break; + case 5: + fprintf (stderr, "1"); + break; + default: + fprintf (stderr, "?"); + break; + } + } +} + +static void +print_src_reg(uint32_t dword) +{ + uint32_t nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; + uint32_t type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(type, nr); + print_reg_neg_swizzle(dword); +} + +static void +print_dest_reg(uint32_t dword) +{ + uint32_t nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; + uint32_t type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(type, nr); + if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) + return; + fprintf (stderr, "."); + if (dword & A0_DEST_CHANNEL_X) + fprintf (stderr, "x"); + if (dword & A0_DEST_CHANNEL_Y) + fprintf (stderr, "y"); + if (dword & A0_DEST_CHANNEL_Z) + fprintf (stderr, "z"); + if (dword & A0_DEST_CHANNEL_W) + fprintf (stderr, "w"); +} + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r) (r) + +static void +print_arith_op(uint32_t opcode, const uint32_t * program) +{ + if (opcode != A0_NOP) { + print_dest_reg(program[0]); + if (program[0] & A0_DEST_SATURATE) + fprintf (stderr, " = SATURATE "); + else + fprintf (stderr, " = "); + } + + fprintf (stderr, "%s ", opcodes[opcode]); + + print_src_reg(GET_SRC0_REG(program[0], program[1])); + if (args[opcode] == 1) { + fprintf (stderr, "\n"); + return; + } + + fprintf (stderr, ", "); + print_src_reg(GET_SRC1_REG(program[1], program[2])); + if (args[opcode] == 2) { + fprintf (stderr, "\n"); + return; + } + + fprintf (stderr, ", "); + print_src_reg(GET_SRC2_REG(program[2])); + fprintf (stderr, "\n"); + return; +} + +static void +print_tex_op(uint32_t opcode, const uint32_t * program) +{ + print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); + fprintf (stderr, " = "); + + fprintf (stderr, "%s ", opcodes[opcode]); + + fprintf (stderr, "S[%d],", program[0] & T0_SAMPLER_NR_MASK); + + print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + fprintf (stderr, "\n"); +} + +static void +print_dcl_op(uint32_t opcode, const uint32_t * program) +{ + fprintf (stderr, "%s ", opcodes[opcode]); + print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); + fprintf (stderr, "\n"); +} + +static void +i915_disassemble_program (const uint32_t * program, uint32_t sz) +{ + uint32_t size = program[0] & 0x1ff; + uint32_t i; + + fprintf (stderr, "\tPROGRAM\n"); + + assert(size + 2 == sz); + + program++; + for (i = 1; i < sz; i += 3, program += 3) { + uint32_t opcode = program[0] & (0x1f << 24); + + fprintf (stderr, "\t\t"); + + if ((int) opcode >= A0_NOP && opcode <= A0_SLT) + print_arith_op(opcode >> 24, program); + else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL) + print_tex_op(opcode >> 24, program); + else if (opcode == D0_DCL) + print_dcl_op(opcode >> 24, program); + else + fprintf (stderr, "Unknown opcode 0x%x\n", opcode); + } + + fprintf (stderr, "\tEND-PROGRAM\n\n"); +} + +static cairo_bool_t +debug_program (struct debug_stream *stream, const char *name, uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + + if (len == 0) { + fprintf (stderr, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + ASSERT_NOT_REACHED; + return FALSE; + } + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + i915_disassemble_program (ptr, len); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +static cairo_bool_t +debug_chain (struct debug_stream *stream, const char *name, uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t old_offset = stream->offset + len * sizeof(uint32_t); + uint32_t i; + + fprintf (stderr, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + fprintf (stderr, "\t0x%08x\n", ptr[i]); + + stream->offset = ptr[1] & ~0x3; + + if (stream->offset < old_offset) + fprintf (stderr, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + else + fprintf (stderr, "\n... skipping from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + + return TRUE; +} + +static cairo_bool_t +debug_variable_length_prim (struct debug_stream *stream) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + uint32_t i, len; + + uint16_t *idx = (uint16_t *)(ptr+1); + for (i = 0; idx[i] != 0xffff; i++) + ; + + len = 1+(i+2)/2; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); + for (i = 0; i < len; i++) + fprintf (stderr, "\t0x%08x\n", ptr[i]); + fprintf (stderr, "\n"); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +#define BITS(dw, hi, lo, ...) \ + do { \ + unsigned himask = 0xffffffffU >> (31 - (hi)); \ + fprintf (stderr, "\t\t "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, ": 0x%x\n", ((dw) & himask) >> (lo)); \ + } while (0) + +#define MBZ(dw, hi, lo) do { \ + unsigned x = (dw) >> (lo); \ + unsigned lomask = (1 << (lo)) - 1; \ + unsigned himask; \ + himask = (1UL << (hi)) - 1; \ + assert ((x & himask & ~lomask) == 0); \ +} while (0) + +#define FLAG(dw, bit, ... ) \ + do { \ + if (((dw) >> (bit)) & 1) { \ + fprintf (stderr, "\t\t "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, "\n"); \ + } \ + } while (0) + +static cairo_bool_t +debug_load_immediate (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t bits = (ptr[0] >> 4) & 0xff; + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords, flags: %x):\n", name, len, bits); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + if (bits & (1<<0)) { + fprintf (stderr, "\t LIS0: 0x%08x\n", ptr[j]); + fprintf (stderr, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); + BITS (ptr[j], 0, 0, "vb invalidate disable"); + j++; + } + if (bits & (1<<1)) { + fprintf (stderr, "\t LIS1: 0x%08x\n", ptr[j]); + BITS (ptr[j], 29, 24, "vb dword width"); + BITS (ptr[j], 21, 16, "vb dword pitch"); + BITS (ptr[j], 15, 0, "vb max index"); + j++; + } + if (bits & (1<<2)) { + int i; + fprintf (stderr, "\t LIS2: 0x%08x\n", ptr[j]); + for (i = 0; i < 8; i++) { + unsigned tc = (ptr[j] >> (i * 4)) & 0xf; + if (tc != 0xf) + BITS (tc, 3, 0, "tex coord %d", i); + } + j++; + } + if (bits & (1<<3)) { + fprintf (stderr, "\t LIS3: 0x%08x\n", ptr[j]); + j++; + } + if (bits & (1<<4)) { + fprintf (stderr, "\t LIS4: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 23, "point width"); + BITS (ptr[j], 22, 19, "line width"); + FLAG (ptr[j], 18, "alpha flatshade"); + FLAG (ptr[j], 17, "fog flatshade"); + FLAG (ptr[j], 16, "spec flatshade"); + FLAG (ptr[j], 15, "rgb flatshade"); + BITS (ptr[j], 14, 13, "cull mode"); + FLAG (ptr[j], 12, "vfmt: point width"); + FLAG (ptr[j], 11, "vfmt: specular/fog"); + FLAG (ptr[j], 10, "vfmt: rgba"); + FLAG (ptr[j], 9, "vfmt: depth offset"); + BITS (ptr[j], 8, 6, "vfmt: position (2==xyzw)"); + FLAG (ptr[j], 5, "force dflt diffuse"); + FLAG (ptr[j], 4, "force dflt specular"); + FLAG (ptr[j], 3, "local depth offset enable"); + FLAG (ptr[j], 2, "vfmt: fp32 fog coord"); + FLAG (ptr[j], 1, "sprite point"); + FLAG (ptr[j], 0, "antialiasing"); + j++; + } + if (bits & (1<<5)) { + fprintf (stderr, "\t LIS5: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 28, "rgba write disables"); + FLAG (ptr[j], 27, "force dflt point width"); + FLAG (ptr[j], 26, "last pixel enable"); + FLAG (ptr[j], 25, "global z offset enable"); + FLAG (ptr[j], 24, "fog enable"); + BITS (ptr[j], 23, 16, "stencil ref"); + BITS (ptr[j], 15, 13, "stencil test"); + BITS (ptr[j], 12, 10, "stencil fail op"); + BITS (ptr[j], 9, 7, "stencil pass z fail op"); + BITS (ptr[j], 6, 4, "stencil pass z pass op"); + FLAG (ptr[j], 3, "stencil write enable"); + FLAG (ptr[j], 2, "stencil test enable"); + FLAG (ptr[j], 1, "color dither enable"); + FLAG (ptr[j], 0, "logiop enable"); + j++; + } + if (bits & (1<<6)) { + fprintf (stderr, "\t LIS6: 0x%08x\n", ptr[j]); + FLAG (ptr[j], 31, "alpha test enable"); + BITS (ptr[j], 30, 28, "alpha func"); + BITS (ptr[j], 27, 20, "alpha ref"); + FLAG (ptr[j], 19, "depth test enable"); + BITS (ptr[j], 18, 16, "depth func"); + FLAG (ptr[j], 15, "blend enable"); + BITS (ptr[j], 14, 12, "blend func"); + BITS (ptr[j], 11, 8, "blend src factor"); + BITS (ptr[j], 7, 4, "blend dst factor"); + FLAG (ptr[j], 3, "depth write enable"); + FLAG (ptr[j], 2, "color write enable"); + BITS (ptr[j], 1, 0, "provoking vertex"); + j++; + } + + fprintf (stderr, "\n"); + + assert(j == len); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +static cairo_bool_t +debug_load_indirect (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t bits = (ptr[0] >> 8) & 0x3f; + uint32_t i, j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + for (i = 0; i < 6; i++) { + if (bits & (1<<i)) { + switch (1<<(8+i)) { + case LI0_STATE_STATIC_INDIRECT: + fprintf (stderr, " STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_DYNAMIC_INDIRECT: + fprintf (stderr, " DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + break; + case LI0_STATE_SAMPLER: + fprintf (stderr, " SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_MAP: + fprintf (stderr, " MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_PROGRAM: + fprintf (stderr, " PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_CONSTANTS: + fprintf (stderr, " CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + default: + ASSERT_NOT_REACHED; + break; + } + } + } + + if (bits == 0) { + fprintf (stderr, "\t DUMMY: 0x%08x\n", ptr[j++]); + } + + fprintf (stderr, "\n"); + + assert(j == len); + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +static void +BR13 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x\n", val); + FLAG (val, 30, "clipping enable"); + BITS (val, 25, 24, "color depth (3==32bpp)"); + BITS (val, 23, 16, "raster op"); + BITS (val, 15, 0, "dest pitch"); +} + +static void +BR2223 (struct debug_stream *stream, + uint32_t val22, uint32_t val23) +{ + union { uint32_t val; short field[2]; } BR22, BR23; + + BR22.val = val22; + BR23.val = val23; + + fprintf (stderr, "\t0x%08x\n", val22); + BITS (val22, 31, 16, "dest y1"); + BITS (val22, 15, 0, "dest x1"); + + fprintf (stderr, "\t0x%08x\n", val23); + BITS (val23, 31, 16, "dest y2"); + BITS (val23, 15, 0, "dest x2"); + + /* The blit engine may produce unexpected results when these aren't met */ + assert(BR22.field[0] < BR23.field[0]); + assert(BR22.field[1] < BR23.field[1]); +} + +static void +BR09 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x -- dest address\n", val); +} + +static void +BR26 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x\n", val); + BITS (val, 31, 16, "src y1"); + BITS (val, 15, 0, "src x1"); +} + +static void +BR11 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x\n", val); + BITS (val, 15, 0, "src pitch"); +} + +static void +BR12 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x -- src address\n", val); +} + +static void +BR16 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x -- color\n", val); +} + +static cairo_bool_t +debug_copy_blit (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR2223(stream, ptr[j], ptr[j+1]); + j += 2; + BR09(stream, ptr[j++]); + BR26(stream, ptr[j++]); + BR11(stream, ptr[j++]); + BR12(stream, ptr[j++]); + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_color_blit (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR2223(stream, ptr[j], ptr[j+1]); + j += 2; + BR09(stream, ptr[j++]); + BR16(stream, ptr[j++]); + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_modes4 (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j]); + BITS (ptr[j], 21, 18, "logicop func"); + FLAG (ptr[j], 17, "stencil test mask modify-enable"); + FLAG (ptr[j], 16, "stencil write mask modify-enable"); + BITS (ptr[j], 15, 8, "stencil test mask"); + BITS (ptr[j], 7, 0, "stencil write mask"); + fprintf (stderr, "\n"); + j++; + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_map_state (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + { + fprintf (stderr, "\t0x%08x\n", ptr[j]); + BITS (ptr[j], 15, 0, "map mask"); + j++; + } + + while (j < len) { + { + fprintf (stderr, "\t TMn.0: 0x%08x\n", ptr[j]); + fprintf (stderr, "\t map address: 0x%08x\n", (ptr[j] & ~0x3)); + FLAG (ptr[j], 1, "vertical line stride"); + FLAG (ptr[j], 0, "vertical line stride offset"); + j++; + } + + { + fprintf (stderr, "\t TMn.1: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 21, "height"); + BITS (ptr[j], 20, 10, "width"); + BITS (ptr[j], 9, 7, "surface format"); + BITS (ptr[j], 6, 3, "texel format"); + FLAG (ptr[j], 2, "use fence regs"); + FLAG (ptr[j], 1, "tiled surface"); + FLAG (ptr[j], 0, "tile walk ymajor"); + j++; + } + { + fprintf (stderr, "\t TMn.2: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 21, "dword pitch"); + BITS (ptr[j], 20, 15, "cube face enables"); + BITS (ptr[j], 14, 9, "max lod"); + FLAG (ptr[j], 8, "mip layout right"); + BITS (ptr[j], 7, 0, "depth"); + j++; + } + } + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_sampler_state (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + { + fprintf (stderr, "\t0x%08x\n", ptr[j]); + BITS (ptr[j], 15, 0, "sampler mask"); + j++; + } + + while (j < len) { + { + fprintf (stderr, "\t TSn.0: 0x%08x\n", ptr[j]); + FLAG (ptr[j], 31, "reverse gamma"); + FLAG (ptr[j], 30, "planar to packed"); + FLAG (ptr[j], 29, "yuv->rgb"); + BITS (ptr[j], 28, 27, "chromakey index"); + BITS (ptr[j], 26, 22, "base mip level"); + BITS (ptr[j], 21, 20, "mip mode filter"); + BITS (ptr[j], 19, 17, "mag mode filter"); + BITS (ptr[j], 16, 14, "min mode filter"); + BITS (ptr[j], 13, 5, "lod bias (s4.4)"); + FLAG (ptr[j], 4, "shadow enable"); + FLAG (ptr[j], 3, "max-aniso-4"); + BITS (ptr[j], 2, 0, "shadow func"); + j++; + } + + { + fprintf (stderr, "\t TSn.1: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 24, "min lod"); + MBZ( ptr[j], 23, 18 ); + FLAG (ptr[j], 17, "kill pixel enable"); + FLAG (ptr[j], 16, "keyed tex filter mode"); + FLAG (ptr[j], 15, "chromakey enable"); + BITS (ptr[j], 14, 12, "tcx wrap mode"); + BITS (ptr[j], 11, 9, "tcy wrap mode"); + BITS (ptr[j], 8, 6, "tcz wrap mode"); + FLAG (ptr[j], 5, "normalized coords"); + BITS (ptr[j], 4, 1, "map (surface) index"); + FLAG (ptr[j], 0, "EAST deinterlacer enable"); + j++; + } + { + fprintf (stderr, "\t TSn.2: 0x%08x (default color)\n", ptr[j]); + j++; + } + } + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_dest_vars (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + { + fprintf (stderr, "\t0x%08x\n", ptr[j]); + FLAG (ptr[j], 31, "early classic ztest"); + FLAG (ptr[j], 30, "opengl tex default color"); + FLAG (ptr[j], 29, "bypass iz"); + FLAG (ptr[j], 28, "lod preclamp"); + BITS (ptr[j], 27, 26, "dither pattern"); + FLAG (ptr[j], 25, "linear gamma blend"); + FLAG (ptr[j], 24, "debug dither"); + BITS (ptr[j], 23, 20, "dstorg x"); + BITS (ptr[j], 19, 16, "dstorg y"); + MBZ (ptr[j], 15, 15 ); + BITS (ptr[j], 14, 12, "422 write select"); + BITS (ptr[j], 11, 8, "cbuf format"); + BITS (ptr[j], 3, 2, "zbuf format"); + FLAG (ptr[j], 1, "vert line stride"); + FLAG (ptr[j], 1, "vert line stride offset"); + j++; + } + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t debug_buf_info( struct debug_stream *stream, + const char *name, + uint32_t len ) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + { + fprintf (stderr, "\t0x%08x\n", ptr[j]); + BITS (ptr[j], 28, 28, "aux buffer id"); + BITS (ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); + FLAG (ptr[j], 23, "use fence regs"); + FLAG (ptr[j], 22, "tiled surface"); + FLAG (ptr[j], 21, "tile walk ymajor"); + MBZ (ptr[j], 20, 14); + BITS (ptr[j], 13, 2, "dword pitch"); + MBZ (ptr[j], 2, 0); + j++; + } + + fprintf (stderr, "\t0x%08x -- buffer base address\n", ptr[j++]); + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +decode_3d_i915 (struct debug_stream *stream) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t cmd = *ptr; + + switch ((cmd >> 24) & 0x1f) { + case 0x6: + return debug (stream, "3DSTATE_ANTI_ALIASING", 1); + case 0x7: + return debug (stream, "3DSTATE_RASTERIZATION_RULES", 1); + case 0x8: + return debug (stream, "3DSTATE_BACKFACE_STENCIL_OPS", 1); + case 0x9: + return debug (stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); + case 0xb: + return debug (stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); + case 0xc: + return debug (stream, "3DSTATE_MODES5", 1); + case 0xd: + return debug_modes4(stream, "3DSTATE_MODES4", 1); + case 0x15: + return debug (stream, "3DSTATE_FOG_COLOR", 1); + case 0x16: + return debug (stream, "3DSTATE_COORD_SET_BINDINGS", 1); + case 0x1c: + /* 3DState16NP */ + switch((cmd >> 19) & 0x1f) { + case 0x10: + return debug (stream, "3DSTATE_SCISSOR_ENABLE", 1); + case 0x11: + return debug (stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); + default: + break; + } + break; + case 0x1d: + /* 3DStateMW */ + switch ((cmd >> 16) & 0xff) { + case 0x0: + return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); + case 0x1: + return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); + case 0x4: + return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); + case 0x5: + return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); + case 0x6: + return debug (stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); + case 0x7: + return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); + case 0x80: + return debug (stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); + case 0x81: + return debug (stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); + case 0x83: + return debug (stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); + case 0x85: + return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); + case 0x88: + return debug (stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); + case 0x89: + return debug (stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); + case 0x8e: + return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); + case 0x97: + return debug (stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); + case 0x98: + return debug (stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); + case 0x99: + return debug (stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); + case 0x9a: + return debug (stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); + case 0x9c: + return debug (stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); + default: + ASSERT_NOT_REACHED; + return 0; + } + break; + case 0x1e: + if (cmd & (1 << 23)) + return debug (stream, "???", (cmd & 0xffff) + 1); + else + return debug (stream, "", 1); + break; + case 0x1f: + if ((cmd & (1 << 23)) == 0) { + return debug_prim (stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); + } else if (cmd & (1 << 17)) { + if ((cmd & 0xffff) == 0) + return debug_variable_length_prim (stream); + else + return debug_prim (stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); + } else + return debug_prim (stream, "3DPRIM (indirect sequential)", 0, 2); + break; + default: + return debug (stream, "", 0); + } + + return FALSE; +} + +static cairo_bool_t +decode_3d_i965 (struct debug_stream *stream) +{ + const uint32_t *data = (uint32_t *) (stream->ptr + stream->offset); + const uint32_t opcode = (data[0] & 0xffff0000) >> 16; + unsigned int idx; + const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes_3d[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_STATE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + }, *opcode_3d; + + for (idx = 0; idx < ARRAY_LENGTH (opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if (opcode == opcode_3d->opcode) { + unsigned int len = 1; + if (opcode_3d->max_len != 1) + len = (data[0] & 0x000000ff) + 2; + return debug (stream, opcode_3d->name, len); + } + } + + return FALSE; +} + +static cairo_bool_t +decode_3d_i830 (struct debug_stream *stream) +{ + ASSERT_NOT_REACHED; + return FALSE; +} + +static cairo_bool_t +i915_debug_packet (struct debug_stream *stream, + int devid) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t cmd = *ptr; + + switch (((cmd >> 29) & 0x7)) { + case 0x0: + switch ((cmd >> 23) & 0x3f) { + case 0x0: + return debug (stream, "MI_NOOP", 1); + case 0x3: + return debug (stream, "MI_WAIT_FOR_EVENT", 1); + case 0x4: + return debug (stream, "MI_FLUSH", 1); + case 0xA: + debug (stream, "MI_BATCH_BUFFER_END", 1); + return FALSE; + case 0x22: + return debug (stream, "MI_LOAD_REGISTER_IMM", 3); + case 0x31: + return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); + default: + break; + } + break; + case 0x1: + break; + case 0x2: + switch ((cmd >> 22) & 0xff) { + case 0x50: + return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); + case 0x53: + return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); + default: + return debug (stream, "blit command", (cmd & 0xff) + 2); + } + break; + case 0x3: + if (IS_965(devid)) + return decode_3d_i965 (stream); + else if (IS_9XX(devid)) + return decode_3d_i915 (stream); + else + return decode_3d_i830 (stream); + default: + break; + } + + fprintf (stderr, "Bogus cmd: %x [%x]\n", (cmd >> 29) & 7, cmd); + ASSERT_NOT_REACHED; + return 0; +} + +void +intel_dump_batchbuffer (const void *batch, + uint32_t length, + int devid) +{ + struct debug_stream stream; + cairo_bool_t done = FALSE; + + fprintf (stderr, "\nBATCH: (%d dwords)\n", length / 4); + + stream.offset = 0; + stream.ptr = batch; + + while (! done && stream.offset < length) { + if (! i915_debug_packet (&stream, devid)) + break; + + assert (stream.offset <= length); + } + + fprintf (stderr, "END-BATCH\n\n"); + fflush (stderr); +} diff --git a/src/drm/cairo-drm-intel-ioctl-private.h b/src/drm/cairo-drm-intel-ioctl-private.h new file mode 100644 index 00000000..74d76b9a --- /dev/null +++ b/src/drm/cairo-drm-intel-ioctl-private.h @@ -0,0 +1,417 @@ +/* Cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Chris Wilson + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + */ + +#ifndef CAIRO_DRM_INTEL_IOCTL_PRIVATE_H +#define CAIRO_DRM_INTEL_IOCTL_PRIVATE_H + +#include "cairo-drm-intel-command-private.h" + +/** @{ + * Intel memory domains + * + * Most of these just align with the various caches in + * the system and are used to flush and invalidate as + * objects end up cached in different domains. + */ +/** CPU cache */ +#define I915_GEM_DOMAIN_CPU 0x00000001 +/** Render cache, used by 2D and 3D drawing */ +#define I915_GEM_DOMAIN_RENDER 0x00000002 +/** Sampler cache, used by texture engine */ +#define I915_GEM_DOMAIN_SAMPLER 0x00000004 +/** Command queue, used to load batch buffers */ +#define I915_GEM_DOMAIN_COMMAND 0x00000008 +/** Instruction cache, used by shader programs */ +#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 +/** Vertex address cache */ +#define I915_GEM_DOMAIN_VERTEX 0x00000020 +/** GTT domain - aperture and scanout */ +#define I915_GEM_DOMAIN_GTT 0x00000040 +/** @} */ + +#define I915_TILING_NONE 0 +#define I915_TILING_X 1 +#define I915_TILING_Y 2 + +#define I915_BIT_6_SWIZZLE_NONE 0 +#define I915_BIT_6_SWIZZLE_9 1 +#define I915_BIT_6_SWIZZLE_9_10 2 +#define I915_BIT_6_SWIZZLE_9_11 3 +#define I915_BIT_6_SWIZZLE_9_10_11 4 + +#define DRM_I915_GEM_EXECBUFFER 0x14 +#define DRM_I915_GEM_BUSY 0x17 +#define DRM_I915_GEM_THROTTLE 0x18 +#define DRM_I915_GEM_CREATE 0x1b +#define DRM_I915_GEM_PREAD 0x1c +#define DRM_I915_GEM_PWRITE 0x1d +#define DRM_I915_GEM_MMAP 0x1e +#define DRM_I915_GEM_SET_DOMAIN 0x1f +#define DRM_I915_GEM_SET_TILING 0x21 +#define DRM_I915_GEM_GET_TILING 0x22 +#define DRM_I915_GEM_GET_APERTURE 0x23 +#define DRM_I915_GEM_MMAP_GTT 0x24 + +struct drm_i915_gem_create { + /** + * Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + */ + uint64_t size; + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + uint32_t handle; + uint32_t pad; +}; + +struct drm_i915_gem_pread { + /** Handle for the object being read. */ + uint32_t handle; + uint32_t pad; + /** Offset into the object to read from */ + uint64_t offset; + /** Length of data to read */ + uint64_t size; + /** + * Pointer to write the data into. + * + * This is a fixed-size type for 32/64 compatibility. + */ + uint64_t data_ptr; +}; + +struct drm_i915_gem_pwrite { + /** Handle for the object being written to. */ + uint32_t handle; + uint32_t pad; + /** Offset into the object to write to */ + uint64_t offset; + /** Length of data to write */ + uint64_t size; + /** + * Pointer to read the data from. + * + * This is a fixed-size type for 32/64 compatibility. + */ + uint64_t data_ptr; +}; + +struct drm_i915_gem_mmap { + /** Handle for the object being mapped. */ + uint32_t handle; + uint32_t pad; + /** Offset in the object to map. */ + uint64_t offset; + /** + * Length of data to map. + * + * The value will be page-aligned. + */ + uint64_t size; + /** + * Returned pointer the data was mapped at. + * + * This is a fixed-size type for 32/64 compatibility. + */ + uint64_t addr_ptr; +}; + +struct drm_i915_gem_mmap_gtt { + /** Handle for the object being mapped. */ + uint32_t handle; + uint32_t pad; + /** + * Fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + uint64_t offset; +}; + +struct drm_i915_gem_set_domain { + /** Handle for the object */ + uint32_t handle; + + /** New read domains */ + uint32_t read_domains; + + /** New write domain */ + uint32_t write_domain; +}; + +struct drm_i915_gem_relocation_entry { + /** + * Handle of the buffer being pointed to by this relocation entry. + * + * It's appealing to make this be an index into the mm_validate_entry + * list to refer to the buffer, but this allows the driver to create + * a relocation list for state buffers and not re-write it per + * exec using the buffer. + */ + uint32_t target_handle; + + /** + * Value to be added to the offset of the target buffer to make up + * the relocation entry. + */ + uint32_t delta; + + /** Offset in the buffer the relocation entry will be written into */ + uint64_t offset; + + /** + * Offset value of the target buffer that the relocation entry was last + * written as. + * + * If the buffer has the same offset as last time, we can skip syncing + * and writing the relocation. This value is written back out by + * the execbuffer ioctl when the relocation is written. + */ + uint64_t presumed_offset; + + /** + * Target memory domains read by this operation. + */ + uint32_t read_domains; + + /** + * Target memory domains written by this operation. + * + * Note that only one domain may be written by the whole + * execbuffer operation, so that where there are conflicts, + * the application will get -EINVAL back. + */ + uint32_t write_domain; +}; + +struct drm_i915_gem_exec_object { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + uint32_t handle; + + /** Number of relocations to be performed on this buffer */ + uint32_t relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + uint64_t relocs_ptr; + + /** Required alignment in graphics aperture */ + uint64_t alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + uint64_t offset; +}; + +struct drm_i915_gem_execbuffer { + /** + * List of buffers to be validated with their relocations to be + * performend on them. + * + * This is a pointer to an array of struct drm_i915_gem_validate_entry. + * + * These buffers must be listed in an order such that all relocations + * a buffer is performing refer to buffers that have already appeared + * in the validate list. + */ + uint64_t buffers_ptr; + uint32_t buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + uint32_t batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + uint32_t batch_len; + uint32_t DR1; + uint32_t DR4; + uint32_t num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + uint64_t cliprects_ptr; +}; + +struct drm_i915_gem_busy { + /** Handle of the buffer to check for busy */ + uint32_t handle; + + /** Return busy status (1 if busy, 0 if idle) */ + uint32_t busy; +}; + +struct drm_i915_gem_set_tiling { + /** Handle of the buffer to have its tiling state updated */ + uint32_t handle; + + /** + * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + * + * This value is to be set on request, and will be updated by the + * kernel on successful return with the actual chosen tiling layout. + * + * The tiling mode may be demoted to I915_TILING_NONE when the system + * has bit 6 swizzling that can't be managed correctly by GEM. + * + * Buffer contents become undefined when changing tiling_mode. + */ + uint32_t tiling_mode; + + /** + * Stride in bytes for the object when in I915_TILING_X or + * I915_TILING_Y. + */ + uint32_t stride; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + uint32_t swizzle_mode; +}; + +struct drm_i915_gem_get_tiling { + /** Handle of the buffer to get tiling state for. */ + uint32_t handle; + + /** + * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + */ + uint32_t tiling_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + uint32_t swizzle_mode; +}; + +struct drm_i915_gem_get_aperture { + /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ + uint64_t aper_size; + + /** + * Available space in the aperture used by i915_gem_execbuffer, in + * bytes + */ + uint64_t aper_available_size; +}; + + +#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) +#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) +#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) +#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) +#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) +#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) +#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) +#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) +#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) +#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) +#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) + +#define I915_MADV_WILLNEED 0 +#define I915_MADV_DONTNEED 1 + +struct drm_i915_gem_madvise { + uint32_t handle; + uint32_t madv; + uint32_t retained; +}; +#define DRM_I915_GEM_MADVISE 0x26 +#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) + + +/* XXX execbuffer2 */ +struct drm_i915_gem_exec_object2 { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + uint32_t handle; + + /** Number of relocations to be performed on this buffer */ + uint32_t relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + uint64_t relocs_ptr; + + /** Required alignment in graphics aperture */ + uint64_t alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + uint64_t offset; + +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) + uint64_t flags; + uint64_t rsvd1; + uint64_t rsvd2; +}; + +struct drm_i915_gem_execbuffer2 { + /** + * List of gem_exec_object2 structs + */ + uint64_t buffers_ptr; + uint32_t buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + uint32_t batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + uint32_t batch_len; + uint32_t DR1; + uint32_t DR4; + uint32_t num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + uint64_t cliprects_ptr; + uint64_t flags; + uint64_t rsvd1; + uint64_t rsvd2; +}; + +#define I915_GEM_3D_PIPELINE 0x1 +#define I915_GEM_MEDIA_PIPELINE 0x2 +#define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) + +#endif /* CAIRO_DRM_INTEL_IOCTL_PRIVATE_H */ diff --git a/src/drm/cairo-drm-intel-private.h b/src/drm/cairo-drm-intel-private.h index f5791f3c..eedb700b 100644 --- a/src/drm/cairo-drm-intel-private.h +++ b/src/drm/cairo-drm-intel-private.h @@ -30,51 +30,30 @@ #ifndef CAIRO_DRM_INTEL_PRIVATE_H #define CAIRO_DRM_INTEL_PRIVATE_H +#include "cairoint.h" +#include "cairo-cache-private.h" #include "cairo-compiler-private.h" -#include "cairo-types-private.h" #include "cairo-drm-private.h" -#include "cairo-list-private.h" #include "cairo-freelist-private.h" +#include "cairo-list-private.h" #include "cairo-mutex-private.h" +#include "cairo-rtree-private.h" +#include "cairo-types-private.h" -/** @{ - * Intel memory domains - * - * Most of these just align with the various caches in - * the system and are used to flush and invalidate as - * objects end up cached in different domains. - */ -/** CPU cache */ -#define I915_GEM_DOMAIN_CPU 0x00000001 -/** Render cache, used by 2D and 3D drawing */ -#define I915_GEM_DOMAIN_RENDER 0x00000002 -/** Sampler cache, used by texture engine */ -#define I915_GEM_DOMAIN_SAMPLER 0x00000004 -/** Command queue, used to load batch buffers */ -#define I915_GEM_DOMAIN_COMMAND 0x00000008 -/** Instruction cache, used by shader programs */ -#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 -/** Vertex address cache */ -#define I915_GEM_DOMAIN_VERTEX 0x00000020 -/** GTT domain - aperture and scanout */ -#define I915_GEM_DOMAIN_GTT 0x00000040 -/** @} */ - -#define I915_TILING_NONE 0 -#define I915_TILING_X 1 -#define I915_TILING_Y 2 - -#define I915_BIT_6_SWIZZLE_NONE 0 -#define I915_BIT_6_SWIZZLE_9 1 -#define I915_BIT_6_SWIZZLE_9_10 2 -#define I915_BIT_6_SWIZZLE_9_11 3 -#define I915_BIT_6_SWIZZLE_9_10_11 4 +#include "cairo-drm-intel-ioctl-private.h" + +#define NEAREST_BIAS (-.375) #define INTEL_TILING_DEFAULT I915_TILING_Y #define INTEL_BO_CACHE_BUCKETS 12 /* cache surfaces up to 16 MiB */ +#define INTEL_GLYPH_CACHE_WIDTH 1024 +#define INTEL_GLYPH_CACHE_HEIGHT 1024 +#define INTEL_GLYPH_CACHE_MIN_SIZE 1 +#define INTEL_GLYPH_CACHE_MAX_SIZE 128 + typedef struct _intel_bo { cairo_drm_bo_t base; @@ -86,12 +65,125 @@ typedef struct _intel_bo { uint32_t tiling; uint32_t swizzle; uint32_t stride; + cairo_bool_t purgeable; + + uint32_t opaque0; + uint32_t opaque1; - cairo_bool_t in_batch; - uint32_t read_domains; - uint32_t write_domain; + struct drm_i915_gem_exec_object2 *exec; + uint32_t batch_read_domains; + uint32_t batch_write_domain; + + cairo_list_t link; } intel_bo_t; +#define INTEL_BATCH_SIZE (64*1024) +#define INTEL_VERTEX_BUFFER_SIZE (512*1024) +#define INTEL_MAX_RELOCS 2048 + +static inline void +intel_bo_mark_purgeable (intel_bo_t *bo, + cairo_bool_t purgeable) +{ + if (bo->base.name == 0) + bo->purgeable = purgeable; +} + +typedef struct _intel_vertex_buffer intel_vertex_buffer_t; + +typedef void (*intel_vertex_buffer_new_func_t) (intel_vertex_buffer_t *vertex_buffer); +typedef void (*intel_vertex_buffer_start_rectangles_func_t) (intel_vertex_buffer_t *vertex_buffer, + uint32_t floats_per_vertex); +typedef void (*intel_vertex_buffer_flush_func_t) (intel_vertex_buffer_t *vertex_buffer); +typedef void (*intel_vertex_buffer_finish_func_t) (intel_vertex_buffer_t *vertex_buffer); + +struct _intel_vertex_buffer { + uint32_t vbo_batch; /* reloc position in batch, 0 -> not yet allocated */ + uint32_t vbo_offset; + uint32_t vbo_used; + + uint32_t vertex_index; + uint32_t vertex_count; + + uint32_t floats_per_vertex; + uint32_t rectangle_size; + + intel_bo_t *last_vbo; + uint32_t last_vbo_offset; + uint32_t last_vbo_space; + + intel_vertex_buffer_new_func_t new; + intel_vertex_buffer_start_rectangles_func_t start_rectangles; + intel_vertex_buffer_flush_func_t flush; + intel_vertex_buffer_finish_func_t finish; + + uint32_t base[INTEL_VERTEX_BUFFER_SIZE / sizeof (uint32_t)]; +}; + +typedef struct _intel_batch intel_batch_t; + +typedef void (*intel_batch_commit_func_t) (intel_batch_t *batch); +typedef void (*intel_batch_reset_func_t) (intel_batch_t *batch); + +struct _intel_batch { + size_t gtt_size; + size_t gtt_avail_size; + + intel_batch_commit_func_t commit; + intel_batch_reset_func_t reset; + + uint16_t exec_count; + uint16_t reloc_count; + uint16_t used; + uint16_t header; + + intel_bo_t *target_bo[INTEL_MAX_RELOCS]; + struct drm_i915_gem_exec_object2 exec[INTEL_MAX_RELOCS]; + struct drm_i915_gem_relocation_entry reloc[INTEL_MAX_RELOCS]; + + uint32_t base[INTEL_BATCH_SIZE / sizeof (uint32_t)]; + + intel_vertex_buffer_t vertex_buffer; +}; + +typedef struct _intel_buffer { + intel_bo_t *bo; + uint32_t offset; + cairo_format_t format; + uint32_t map0, map1; + uint32_t width; + uint32_t height; + uint32_t stride; +} intel_buffer_t; + +typedef struct _intel_buffer_cache { + int ref_count; + intel_buffer_t buffer; + cairo_rtree_t rtree; + cairo_list_t link; +} intel_buffer_cache_t; + +typedef struct _intel_glyph { + cairo_rtree_node_t node; + intel_buffer_cache_t *cache; + void **owner; + float texcoord[3]; +} intel_glyph_t; + +typedef struct _intel_gradient_cache { + cairo_pattern_union_t pattern; + intel_buffer_t buffer; +} intel_gradient_cache_t; +#define GRADIENT_CACHE_SIZE 16 + +typedef struct _intel_surface { + cairo_drm_surface_t drm; + + cairo_cache_entry_t snapshot_cache_entry; +} intel_surface_t; + +typedef void (*intel_reset_context_func_t) (void *device); + typedef struct _intel_device { cairo_drm_device_t base; @@ -108,8 +200,54 @@ typedef struct _intel_device { size_t bo_cache_size; size_t bo_max_cache_size_high; size_t bo_max_cache_size_low; + + cairo_mutex_t mutex; + intel_batch_t batch; + + cairo_bool_t glyph_cache_mapped; + intel_buffer_cache_t glyph_cache[2]; + + struct { + intel_gradient_cache_t cache[GRADIENT_CACHE_SIZE]; + unsigned int size; + } gradient_cache; + + cairo_cache_t snapshot_cache; + size_t snapshot_cache_max_size; + + intel_reset_context_func_t reset_context; + + cairo_status_t (*flush) (struct _intel_device *); } intel_device_t; +static inline intel_device_t * +to_intel_device (cairo_device_t *base) +{ + return (intel_device_t *) base; +} + +static inline intel_bo_t * +to_intel_bo (cairo_drm_bo_t *base) +{ + return (intel_bo_t *) base; +} + +static inline intel_bo_t * +intel_bo_reference (intel_bo_t *bo) +{ + return to_intel_bo (cairo_drm_bo_reference (&bo->base)); +} + +cairo_private cairo_bool_t +intel_bo_madvise (intel_device_t *device, intel_bo_t *bo, int madv); + + +static always_inline void +intel_bo_destroy (intel_device_t *device, intel_bo_t *bo) +{ + cairo_drm_bo_destroy (&device->base.base, &bo->base); +} + cairo_private cairo_bool_t intel_info (int fd, uint64_t *gtt_size); @@ -119,23 +257,27 @@ intel_device_init (intel_device_t *device, int fd); cairo_private void intel_device_fini (intel_device_t *dev); -cairo_private cairo_drm_bo_t * +cairo_private intel_bo_t * intel_bo_create (intel_device_t *dev, uint32_t size, cairo_bool_t gpu_target); -cairo_private void -intel_bo_release (void *_dev, void *_bo); - -cairo_private cairo_drm_bo_t * +cairo_private intel_bo_t * intel_bo_create_for_name (intel_device_t *dev, uint32_t name); cairo_private void -intel_bo_set_tiling (intel_device_t *dev, +intel_bo_set_tiling (const intel_device_t *dev, intel_bo_t *bo, uint32_t tiling, uint32_t stride); +cairo_private cairo_bool_t +intel_bo_is_inactive (const intel_device_t *device, + const intel_bo_t *bo); + +cairo_private void +intel_bo_wait (const intel_device_t *device, const intel_bo_t *bo); + cairo_private void intel_bo_write (const intel_device_t *dev, intel_bo_t *bo, @@ -150,9 +292,6 @@ intel_bo_read (const intel_device_t *dev, unsigned long size, void *data); -cairo_private void -intel_bo_wait (const intel_device_t *dev, intel_bo_t *bo); - cairo_private void * intel_bo_map (const intel_device_t *dev, intel_bo_t *bo); @@ -176,7 +315,203 @@ intel_bo_get_image (const intel_device_t *device, intel_bo_t *bo, const cairo_drm_surface_t *surface); +cairo_private cairo_status_t +intel_bo_put_image (intel_device_t *dev, + intel_bo_t *bo, int stride, + cairo_image_surface_t *src, + int src_x, int src_y, + int width, int height, + int dst_x, int dst_y); + +cairo_private void +intel_surface_init (intel_surface_t *surface, + const cairo_surface_backend_t *backend, + cairo_drm_device_t *device, + cairo_content_t content); + +cairo_private cairo_status_t +intel_buffer_cache_init (intel_buffer_cache_t *cache, + intel_device_t *device, + cairo_format_t format, + int width, int height); + +cairo_private cairo_status_t +intel_gradient_render (intel_device_t *device, + const cairo_gradient_pattern_t *pattern, + intel_buffer_t *buffer); + +cairo_private cairo_int_status_t +intel_get_glyph (intel_device_t *device, + cairo_scaled_font_t *scaled_font, + cairo_scaled_glyph_t *scaled_glyph); + +cairo_private void +intel_scaled_glyph_fini (cairo_scaled_glyph_t *scaled_glyph, + cairo_scaled_font_t *scaled_font); + +cairo_private void +intel_scaled_font_fini (cairo_scaled_font_t *scaled_font); + +cairo_private void +intel_glyph_cache_unmap (intel_device_t *device); + +cairo_private void +intel_glyph_cache_unpin (intel_device_t *device); + +static inline intel_glyph_t * +intel_glyph_pin (intel_glyph_t *glyph) +{ + cairo_rtree_node_t *node = &glyph->node; + if (unlikely (node->pinned == 0)) + return _cairo_rtree_pin (&glyph->cache->rtree, node); + return glyph; +} + +cairo_private cairo_status_t +intel_snapshot_cache_insert (intel_device_t *device, + intel_surface_t *surface); + +cairo_private void +intel_surface_detach_snapshot (cairo_surface_t *abstract_surface); + +cairo_private void +intel_snapshot_cache_thaw (intel_device_t *device); + cairo_private void intel_throttle (intel_device_t *device); +cairo_private cairo_status_t +intel_surface_acquire_source_image (void *abstract_surface, + cairo_image_surface_t **image_out, + void **image_extra); + +cairo_private void +intel_surface_release_source_image (void *abstract_surface, + cairo_image_surface_t *image, + void *image_extra); +cairo_private cairo_surface_t * +intel_surface_map_to_image (void *abstract_surface); + +cairo_private cairo_status_t +intel_surface_flush (void *abstract_surface); + +cairo_private cairo_status_t +intel_surface_finish (void *abstract_surface); + +cairo_private void +intel_dump_batchbuffer (const void *batch, + uint32_t length, + int devid); + +static inline float cairo_const +texcoord_2d_16 (double x, double y) +{ + union { + uint32_t ui; + float f; + } u; + u.ui = (_cairo_half_from_float (y) << 16) | _cairo_half_from_float (x); + return u.f; +} + +static inline uint32_t cairo_const +MS3_tiling (uint32_t tiling) +{ + switch (tiling) { + default: + case I915_TILING_NONE: return 0; + case I915_TILING_X: return MS3_TILED_SURFACE; + case I915_TILING_Y: return MS3_TILED_SURFACE | MS3_TILE_WALK; + } +} + +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 + +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE + +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q33_G 0x29D2 + +#define PCI_CHIP_IGD_GM 0xA011 +#define PCI_CHIP_IGD_G 0xA001 + +#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM) +#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G) +#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) + +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I915_GM || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_GM45_GM || IS_IGD(devid)) + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ + devid == PCI_CHIP_G41_G) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_IRONLAKE(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_915(devid) (devid == PCI_CHIP_I915_G || \ + devid == PCI_CHIP_E7221_G || \ + devid == PCI_CHIP_I915_GM) + +#define IS_945(devid) (devid == PCI_CHIP_I945_G || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_G33_G || \ + devid == PCI_CHIP_Q33_G || \ + devid == PCI_CHIP_Q35_G || IS_IGD(devid)) + +#define IS_965(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid) || \ + IS_IRONLAKE(devid)) + +#define IS_9XX(devid) (IS_915(devid) || \ + IS_945(devid) || \ + IS_965(devid)) + + #endif /* CAIRO_DRM_INTEL_PRIVATE_H */ diff --git a/src/drm/cairo-drm-intel-surface.c b/src/drm/cairo-drm-intel-surface.c index e9e1cacc..5b7c60be 100644 --- a/src/drm/cairo-drm-intel-surface.c +++ b/src/drm/cairo-drm-intel-surface.c @@ -31,84 +31,66 @@ #include "cairo-drm-private.h" #include "cairo-drm-intel-private.h" + #include "cairo-error-private.h" /* Basic generic/stub surface for intel chipsets */ #define MAX_SIZE 2048 -typedef struct _intel_surface intel_surface_t; - -struct _intel_surface { - cairo_drm_surface_t base; -}; - -static inline intel_device_t * -to_intel_device (cairo_drm_device_t *device) -{ - return (intel_device_t *) device; -} - -static inline intel_bo_t * -to_intel_bo (cairo_drm_bo_t *bo) -{ - return (intel_bo_t *) bo; -} - -static cairo_status_t -intel_batch_flush (intel_device_t *device) -{ - return CAIRO_STATUS_SUCCESS; -} - -static cairo_status_t -intel_surface_batch_flush (intel_surface_t *surface) +static cairo_surface_t * +intel_surface_create_similar (void *abstract_surface, + cairo_content_t content, + int width, + int height) { - if (to_intel_bo (surface->base.bo)->write_domain) - return intel_batch_flush (to_intel_device (surface->base.device)); - - return CAIRO_STATUS_SUCCESS; + return cairo_image_surface_create (_cairo_format_from_content (content), + width, height); } -static cairo_status_t +cairo_status_t intel_surface_finish (void *abstract_surface) { intel_surface_t *surface = abstract_surface; - return _cairo_drm_surface_finish (&surface->base); + return _cairo_drm_surface_finish (&surface->drm); } -static cairo_status_t +cairo_status_t intel_surface_acquire_source_image (void *abstract_surface, - cairo_image_surface_t **image_out, - void **image_extra) + cairo_image_surface_t **image_out, + void **image_extra) { intel_surface_t *surface = abstract_surface; cairo_surface_t *image; cairo_status_t status; - if (surface->base.fallback != NULL) { - image = surface->base.fallback; + /* XXX batch flush */ + + if (surface->drm.fallback != NULL) { + image = surface->drm.fallback; goto DONE; } - image = _cairo_surface_has_snapshot (&surface->base.base, + image = _cairo_surface_has_snapshot (&surface->drm.base, &_cairo_image_surface_backend); if (image != NULL) goto DONE; - status = intel_surface_batch_flush (surface); - if (unlikely (status)) - return status; + if (surface->drm.base.backend->flush != NULL) { + status = surface->drm.base.backend->flush (surface); + if (unlikely (status)) + return status; + } - image = intel_bo_get_image (to_intel_device (surface->base.device), - to_intel_bo (surface->base.bo), - &surface->base); + image = intel_bo_get_image (to_intel_device (surface->drm.base.device), + to_intel_bo (surface->drm.bo), + &surface->drm); status = image->status; if (unlikely (status)) return status; - status = _cairo_surface_attach_snapshot (&surface->base.base, + status = _cairo_surface_attach_snapshot (&surface->drm.base, image, cairo_surface_destroy); if (unlikely (status)) { @@ -122,7 +104,7 @@ DONE: return CAIRO_STATUS_SUCCESS; } -static void +void intel_surface_release_source_image (void *abstract_surface, cairo_image_surface_t *image, void *image_extra) @@ -130,180 +112,200 @@ intel_surface_release_source_image (void *abstract_surface, cairo_surface_destroy (&image->base); } -static cairo_surface_t * -intel_surface_snapshot (void *abstract_surface) +cairo_surface_t * +intel_surface_map_to_image (void *abstract_surface) { intel_surface_t *surface = abstract_surface; - cairo_status_t status; - if (surface->base.fallback != NULL) - return NULL; + if (surface->drm.fallback == NULL) { + cairo_surface_t *image; + cairo_status_t status; + void *ptr; - status = intel_surface_batch_flush (surface); - if (unlikely (status)) - return _cairo_surface_create_in_error (status); + if (surface->drm.base.backend->flush != NULL) { + status = surface->drm.base.backend->flush (surface); + if (unlikely (status)) + return _cairo_surface_create_in_error (status); + } + + ptr = intel_bo_map (to_intel_device (surface->drm.base.device), + to_intel_bo (surface->drm.bo)); + if (unlikely (ptr == NULL)) + return _cairo_surface_create_in_error (CAIRO_STATUS_NO_MEMORY); + + image = cairo_image_surface_create_for_data (ptr, + surface->drm.format, + surface->drm.width, + surface->drm.height, + surface->drm.stride); + if (unlikely (image->status)) { + intel_bo_unmap (to_intel_bo (surface->drm.bo)); + return image; + } + + surface->drm.fallback = image; + } - return intel_bo_get_image (to_intel_device (surface->base.device), - to_intel_bo (surface->base.bo), - &surface->base); + return surface->drm.fallback; } -static cairo_status_t -intel_surface_acquire_dest_image (void *abstract_surface, - cairo_rectangle_int_t *interest_rect, - cairo_image_surface_t **image_out, - cairo_rectangle_int_t *image_rect_out, - void **image_extra) +cairo_status_t +intel_surface_flush (void *abstract_surface) { intel_surface_t *surface = abstract_surface; - cairo_surface_t *image; cairo_status_t status; - void *ptr; - assert (surface->base.fallback == NULL); + if (surface->drm.fallback == NULL) + return CAIRO_STATUS_SUCCESS; - status = intel_surface_batch_flush (surface); - if (unlikely (status)) - return status; - - /* Force a read barrier, as well as flushing writes above */ - if (to_intel_bo (surface->base.bo)->in_batch) { - status = intel_batch_flush (to_intel_device (surface->base.device)); - if (unlikely (status)) - return status; - } - - ptr = intel_bo_map (to_intel_device (surface->base.device), - to_intel_bo (surface->base.bo)); - if (unlikely (ptr == NULL)) - return _cairo_error (CAIRO_STATUS_NO_MEMORY); - - image = cairo_image_surface_create_for_data (ptr, - surface->base.format, - surface->base.width, - surface->base.height, - surface->base.stride); - status = image->status; - if (unlikely (status)) { - intel_bo_unmap (to_intel_bo (surface->base.bo)); - return status; - } - - surface->base.fallback = cairo_surface_reference (image); + /* kill any outstanding maps */ + cairo_surface_finish (surface->drm.fallback); - *image_out = (cairo_image_surface_t *) image; - *image_extra = NULL; + status = cairo_surface_status (surface->drm.fallback); + cairo_surface_destroy (surface->drm.fallback); + surface->drm.fallback = NULL; - image_rect_out->x = 0; - image_rect_out->y = 0; - image_rect_out->width = surface->base.width; - image_rect_out->height = surface->base.height; + intel_bo_unmap (to_intel_bo (surface->drm.bo)); - return CAIRO_STATUS_SUCCESS; + return status; } -static void -intel_surface_release_dest_image (void *abstract_surface, - cairo_rectangle_int_t *interest_rect, - cairo_image_surface_t *image, - cairo_rectangle_int_t *image_rect, - void *image_extra) +static cairo_int_status_t +intel_surface_paint (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_clip_t *clip) { - /* Keep the fallback until we flush, either explicitly or at the - * end of this context. The idea is to avoid excess migration of - * the buffer between GPU and CPU domains. - */ - cairo_surface_destroy (&image->base); + return _cairo_surface_paint (intel_surface_map_to_image (abstract_surface), + op, source, clip); } -static cairo_status_t -intel_surface_flush (void *abstract_surface) +static cairo_int_status_t +intel_surface_mask (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + const cairo_pattern_t *mask, + cairo_clip_t *clip) { - intel_surface_t *surface = abstract_surface; - cairo_status_t status; - - if (surface->base.fallback == NULL) - return intel_surface_batch_flush (surface); - - /* kill any outstanding maps */ - cairo_surface_finish (surface->base.fallback); + return _cairo_surface_mask (intel_surface_map_to_image (abstract_surface), + op, source, mask, clip); +} - status = cairo_surface_status (surface->base.fallback); - cairo_surface_destroy (surface->base.fallback); - surface->base.fallback = NULL; +static cairo_int_status_t +intel_surface_stroke (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + const cairo_stroke_style_t *stroke_style, + const cairo_matrix_t *ctm, + const cairo_matrix_t *ctm_inverse, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + return _cairo_surface_stroke (intel_surface_map_to_image (abstract_surface), + op, source, path, stroke_style, ctm, ctm_inverse, + tolerance, antialias, clip); +} - intel_bo_unmap (to_intel_bo (surface->base.bo)); +static cairo_int_status_t +intel_surface_fill (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + return _cairo_surface_fill (intel_surface_map_to_image (abstract_surface), + op, source, path, fill_rule, + tolerance, antialias, clip); +} - return status; +static cairo_int_status_t +intel_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining) +{ + *num_remaining = 0; + return _cairo_surface_show_text_glyphs (intel_surface_map_to_image (abstract_surface), + op, source, + NULL, 0, + glyphs, num_glyphs, + NULL, 0, 0, + scaled_font, clip); } static const cairo_surface_backend_t intel_surface_backend = { CAIRO_SURFACE_TYPE_DRM, - _cairo_drm_surface_create_similar, - intel_surface_finish, + intel_surface_create_similar, + intel_surface_finish, intel_surface_acquire_source_image, intel_surface_release_source_image, - intel_surface_acquire_dest_image, - intel_surface_release_dest_image, - - NULL, //intel_surface_clone_similar, - NULL, //intel_surface_composite, - NULL, //intel_surface_fill_rectangles, - NULL, //intel_surface_composite_trapezoids, - NULL, //intel_surface_create_span_renderer, - NULL, //intel_surface_check_span_renderer, + NULL, NULL, NULL, + NULL, /* composite */ + NULL, /* fill */ + NULL, /* trapezoids */ + NULL, /* span */ + NULL, /* check-span */ + NULL, /* copy_page */ NULL, /* show_page */ _cairo_drm_surface_get_extents, - NULL, /* old_show_glyphs */ + NULL, /* old-glyphs */ _cairo_drm_surface_get_font_options, - intel_surface_flush, - NULL, /* mark_dirty_rectangle */ - NULL, //intel_surface_scaled_font_fini, - NULL, //intel_surface_scaled_glyph_fini, - - _cairo_drm_surface_paint, - _cairo_drm_surface_mask, - _cairo_drm_surface_stroke, - _cairo_drm_surface_fill, - _cairo_drm_surface_show_glyphs, - intel_surface_snapshot, - - NULL, /* is_similar */ + intel_surface_flush, + NULL, /* mark dirty */ + NULL, NULL, /* font/glyph fini */ + + intel_surface_paint, + intel_surface_mask, + intel_surface_stroke, + intel_surface_fill, + intel_surface_glyphs, }; -static void +void intel_surface_init (intel_surface_t *surface, - cairo_content_t content, - cairo_drm_device_t *device) + const cairo_surface_backend_t *backend, + cairo_drm_device_t *device, + cairo_content_t content) { - _cairo_surface_init (&surface->base.base, - &intel_surface_backend, - NULL, /* device */ + _cairo_surface_init (&surface->drm.base, + backend, + &device->base, content); - _cairo_drm_surface_init (&surface->base, device); + _cairo_drm_surface_init (&surface->drm, device); switch (content) { case CAIRO_CONTENT_ALPHA: - surface->base.format = CAIRO_FORMAT_A8; + surface->drm.format = CAIRO_FORMAT_A8; break; case CAIRO_CONTENT_COLOR: - surface->base.format = CAIRO_FORMAT_RGB24; + surface->drm.format = CAIRO_FORMAT_RGB24; break; default: ASSERT_NOT_REACHED; case CAIRO_CONTENT_COLOR_ALPHA: - surface->base.format = CAIRO_FORMAT_ARGB32; + surface->drm.format = CAIRO_FORMAT_ARGB32; break; } + + surface->snapshot_cache_entry.hash = 0; } static cairo_surface_t * -intel_surface_create_internal (cairo_drm_device_t *device, - cairo_content_t content, - int width, int height) +intel_surface_create (cairo_drm_device_t *device, + cairo_content_t content, + int width, int height) { intel_surface_t *surface; cairo_status_t status; @@ -312,36 +314,28 @@ intel_surface_create_internal (cairo_drm_device_t *device, if (unlikely (surface == NULL)) return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); - intel_surface_init (surface, content, device); + intel_surface_init (surface, &intel_surface_backend, device, content); if (width && height) { - surface->base.width = width; - surface->base.height = height; + surface->drm.width = width; + surface->drm.height = height; /* Vol I, p134: size restrictions for textures */ width = (width + 3) & -4; height = (height + 1) & -2; - surface->base.stride = - cairo_format_stride_for_width (surface->base.format, width); - surface->base.bo = intel_bo_create (to_intel_device (device), - surface->base.stride * height, - TRUE); - if (surface->base.bo == NULL) { - status = _cairo_drm_surface_finish (&surface->base); + surface->drm.stride = + cairo_format_stride_for_width (surface->drm.format, width); + surface->drm.bo = &intel_bo_create (to_intel_device (&device->base), + surface->drm.stride * height, + TRUE)->base; + if (surface->drm.bo == NULL) { + status = _cairo_drm_surface_finish (&surface->drm); free (surface); return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); } } - return &surface->base.base; -} - -static cairo_surface_t * -intel_surface_create (cairo_drm_device_t *device, - cairo_content_t content, - int width, int height) -{ - return intel_surface_create_internal (device, content, width, height); + return &surface->drm.base; } static cairo_surface_t * @@ -376,46 +370,41 @@ intel_surface_create_for_name (cairo_drm_device_t *device, if (unlikely (surface == NULL)) return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); - intel_surface_init (surface, content, device); + intel_surface_init (surface, &intel_surface_backend, device, content); if (width && height) { - surface->base.width = width; - surface->base.height = height; - surface->base.stride = stride; - - surface->base.bo = intel_bo_create_for_name (to_intel_device (device), - name); - if (unlikely (surface->base.bo == NULL)) { - status = _cairo_drm_surface_finish (&surface->base); + surface->drm.width = width; + surface->drm.height = height; + surface->drm.stride = stride; + + surface->drm.bo = &intel_bo_create_for_name (to_intel_device (&device->base), + name)->base; + if (unlikely (surface->drm.bo == NULL)) { + status = _cairo_drm_surface_finish (&surface->drm); free (surface); return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); } } - return &surface->base.base; + return &surface->drm.base; } static cairo_status_t intel_surface_enable_scan_out (void *abstract_surface) { intel_surface_t *surface = abstract_surface; - cairo_status_t status; - if (unlikely (surface->base.bo == NULL)) + if (unlikely (surface->drm.bo == NULL)) return _cairo_error (CAIRO_STATUS_INVALID_SIZE); - status = intel_surface_batch_flush (surface); - if (unlikely (status)) - return status; - - if (to_intel_bo (surface->base.bo)->tiling == I915_TILING_Y) { - intel_bo_set_tiling (to_intel_device (surface->base.device), - to_intel_bo (surface->base.bo), - I915_TILING_X, surface->base.stride); + if (to_intel_bo (surface->drm.bo)->tiling == I915_TILING_Y) { + intel_bo_set_tiling (to_intel_device (surface->drm.base.device), + to_intel_bo (surface->drm.bo), + I915_TILING_X, surface->drm.stride); } - if (unlikely (to_intel_bo (surface->base.bo)->tiling == I915_TILING_Y)) + if (unlikely (to_intel_bo (surface->drm.bo)->tiling == I915_TILING_Y)) return _cairo_error (CAIRO_STATUS_INVALID_FORMAT); /* XXX */ return CAIRO_STATUS_SUCCESS; @@ -424,13 +413,7 @@ intel_surface_enable_scan_out (void *abstract_surface) static cairo_int_status_t intel_device_throttle (cairo_drm_device_t *device) { - cairo_status_t status; - - status = intel_batch_flush (to_intel_device (device)); - if (unlikely (status)) - return status; - - intel_throttle (to_intel_device (device)); + intel_throttle (to_intel_device (&device->base)); return CAIRO_STATUS_SUCCESS; } @@ -455,24 +438,28 @@ _cairo_drm_intel_device_create (int fd, dev_t dev, int vendor_id, int chip_id) device = malloc (sizeof (intel_device_t)); if (unlikely (device == NULL)) - return _cairo_drm_device_create_in_error (CAIRO_STATUS_NO_MEMORY); + return (cairo_drm_device_t *) _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); status = intel_device_init (device, fd); if (unlikely (status)) { free (device); - return _cairo_drm_device_create_in_error (status); + return (cairo_drm_device_t *) _cairo_device_create_in_error (status); } - device->base.bo.release = intel_bo_release; - device->base.surface.create = intel_surface_create; device->base.surface.create_for_name = intel_surface_create_for_name; device->base.surface.create_from_cacheable_image = NULL; device->base.surface.flink = _cairo_drm_surface_flink; device->base.surface.enable_scan_out = intel_surface_enable_scan_out; + device->base.surface.map_to_image = intel_surface_map_to_image; + + device->base.device.flush = NULL; device->base.device.throttle = intel_device_throttle; device->base.device.destroy = intel_device_destroy; - return _cairo_drm_device_init (&device->base, fd, dev, MAX_SIZE); + return _cairo_drm_device_init (&device->base, + fd, dev, + vendor_id, chip_id, + MAX_SIZE); } diff --git a/src/drm/cairo-drm-intel.c b/src/drm/cairo-drm-intel.c index 6c8a8fd2..7cbbb16c 100644 --- a/src/drm/cairo-drm-intel.c +++ b/src/drm/cairo-drm-intel.c @@ -32,6 +32,8 @@ #include "cairo-drm-private.h" #include "cairo-drm-ioctl-private.h" #include "cairo-drm-intel-private.h" +#include "cairo-drm-intel-ioctl-private.h" + #include "cairo-error-private.h" #include "cairo-freelist-private.h" @@ -39,300 +41,13 @@ #include <sys/mman.h> #include <errno.h> -#define DRM_I915_GEM_EXECBUFFER 0x14 -#define DRM_I915_GEM_BUSY 0x17 -#define DRM_I915_GEM_THROTTLE 0x18 -#define DRM_I915_GEM_CREATE 0x1b -#define DRM_I915_GEM_PREAD 0x1c -#define DRM_I915_GEM_PWRITE 0x1d -#define DRM_I915_GEM_MMAP 0x1e -#define DRM_I915_GEM_SET_DOMAIN 0x1f -#define DRM_I915_GEM_SET_TILING 0x21 -#define DRM_I915_GEM_GET_TILING 0x22 -#define DRM_I915_GEM_GET_APERTURE 0x23 -#define DRM_I915_GEM_MMAP_GTT 0x24 - -struct drm_i915_gem_create { - /** - * Requested size for the object. - * - * The (page-aligned) allocated size for the object will be returned. - */ - uint64_t size; - /** - * Returned handle for the object. - * - * Object handles are nonzero. - */ - uint32_t handle; - uint32_t pad; -}; - -struct drm_i915_gem_pread { - /** Handle for the object being read. */ - uint32_t handle; - uint32_t pad; - /** Offset into the object to read from */ - uint64_t offset; - /** Length of data to read */ - uint64_t size; - /** - * Pointer to write the data into. - * - * This is a fixed-size type for 32/64 compatibility. - */ - uint64_t data_ptr; -}; - -struct drm_i915_gem_pwrite { - /** Handle for the object being written to. */ - uint32_t handle; - uint32_t pad; - /** Offset into the object to write to */ - uint64_t offset; - /** Length of data to write */ - uint64_t size; - /** - * Pointer to read the data from. - * - * This is a fixed-size type for 32/64 compatibility. - */ - uint64_t data_ptr; -}; - -struct drm_i915_gem_mmap { - /** Handle for the object being mapped. */ - uint32_t handle; - uint32_t pad; - /** Offset in the object to map. */ - uint64_t offset; - /** - * Length of data to map. - * - * The value will be page-aligned. - */ - uint64_t size; - /** - * Returned pointer the data was mapped at. - * - * This is a fixed-size type for 32/64 compatibility. - */ - uint64_t addr_ptr; -}; - -struct drm_i915_gem_mmap_gtt { - /** Handle for the object being mapped. */ - uint32_t handle; - uint32_t pad; - /** - * Fake offset to use for subsequent mmap call - * - * This is a fixed-size type for 32/64 compatibility. - */ - uint64_t offset; -}; - -struct drm_i915_gem_set_domain { - /** Handle for the object */ - uint32_t handle; - - /** New read domains */ - uint32_t read_domains; - - /** New write domain */ - uint32_t write_domain; -}; - -struct drm_i915_gem_relocation_entry { - /** - * Handle of the buffer being pointed to by this relocation entry. - * - * It's appealing to make this be an index into the mm_validate_entry - * list to refer to the buffer, but this allows the driver to create - * a relocation list for state buffers and not re-write it per - * exec using the buffer. - */ - uint32_t target_handle; - - /** - * Value to be added to the offset of the target buffer to make up - * the relocation entry. - */ - uint32_t delta; - - /** Offset in the buffer the relocation entry will be written into */ - uint64_t offset; - - /** - * Offset value of the target buffer that the relocation entry was last - * written as. - * - * If the buffer has the same offset as last time, we can skip syncing - * and writing the relocation. This value is written back out by - * the execbuffer ioctl when the relocation is written. - */ - uint64_t presumed_offset; - - /** - * Target memory domains read by this operation. - */ - uint32_t read_domains; - - /** - * Target memory domains written by this operation. - * - * Note that only one domain may be written by the whole - * execbuffer operation, so that where there are conflicts, - * the application will get -EINVAL back. - */ - uint32_t write_domain; -}; - -struct drm_i915_gem_exec_object { - /** - * User's handle for a buffer to be bound into the GTT for this - * operation. - */ - uint32_t handle; - - /** Number of relocations to be performed on this buffer */ - uint32_t relocation_count; - /** - * Pointer to array of struct drm_i915_gem_relocation_entry containing - * the relocations to be performed in this buffer. - */ - uint64_t relocs_ptr; - - /** Required alignment in graphics aperture */ - uint64_t alignment; - - /** - * Returned value of the updated offset of the object, for future - * presumed_offset writes. - */ - uint64_t offset; -}; - -struct drm_i915_gem_execbuffer { - /** - * List of buffers to be validated with their relocations to be - * performend on them. - * - * This is a pointer to an array of struct drm_i915_gem_validate_entry. - * - * These buffers must be listed in an order such that all relocations - * a buffer is performing refer to buffers that have already appeared - * in the validate list. - */ - uint64_t buffers_ptr; - uint32_t buffer_count; - - /** Offset in the batchbuffer to start execution from. */ - uint32_t batch_start_offset; - /** Bytes used in batchbuffer from batch_start_offset */ - uint32_t batch_len; - uint32_t DR1; - uint32_t DR4; - uint32_t num_cliprects; - /** This is a struct drm_clip_rect *cliprects */ - uint64_t cliprects_ptr; -}; - -struct drm_i915_gem_busy { - /** Handle of the buffer to check for busy */ - uint32_t handle; - - /** Return busy status (1 if busy, 0 if idle) */ - uint32_t busy; -}; - -struct drm_i915_gem_set_tiling { - /** Handle of the buffer to have its tiling state updated */ - uint32_t handle; - - /** - * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, - * I915_TILING_Y). - * - * This value is to be set on request, and will be updated by the - * kernel on successful return with the actual chosen tiling layout. - * - * The tiling mode may be demoted to I915_TILING_NONE when the system - * has bit 6 swizzling that can't be managed correctly by GEM. - * - * Buffer contents become undefined when changing tiling_mode. - */ - uint32_t tiling_mode; - - /** - * Stride in bytes for the object when in I915_TILING_X or - * I915_TILING_Y. - */ - uint32_t stride; - - /** - * Returned address bit 6 swizzling required for CPU access through - * mmap mapping. - */ - uint32_t swizzle_mode; -}; - -struct drm_i915_gem_get_tiling { - /** Handle of the buffer to get tiling state for. */ - uint32_t handle; - - /** - * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, - * I915_TILING_Y). - */ - uint32_t tiling_mode; - - /** - * Returned address bit 6 swizzling required for CPU access through - * mmap mapping. - */ - uint32_t swizzle_mode; -}; - -struct drm_i915_gem_get_aperture { - /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ - uint64_t aper_size; - - /** - * Available space in the aperture used by i915_gem_execbuffer, in - * bytes - */ - uint64_t aper_available_size; -}; - - -#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) -#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) -#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) -#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) -#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) -#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) -#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) -#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) -#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) -#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) -#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) -#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) - -/* XXX madvise */ -#ifndef DRM_I915_GEM_MADVISE -#define I915_MADV_WILLNEED 0 -#define I915_MADV_DONTNEED 1 - -struct drm_i915_gem_madvise { - uint32_t handle; - uint32_t madv; - uint32_t retained; -}; -#define DRM_I915_GEM_MADVISE 0x26 -#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) -#endif +#define GLYPH_CACHE_WIDTH 1024 +#define GLYPH_CACHE_HEIGHT 1024 +#define GLYPH_CACHE_MIN_SIZE 1 +#define GLYPH_CACHE_MAX_SIZE 128 +#define IMAGE_CACHE_WIDTH 1024 +#define IMAGE_CACHE_HEIGHT 1024 cairo_bool_t intel_info (int fd, uint64_t *gtt_size) @@ -453,12 +168,14 @@ intel_bo_map (const intel_device_t *device, intel_bo_t *bo) do { ret = ioctl (device->base.fd, - DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); } while (ret == -1 && errno == EINTR); if (ret != 0) { - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - return NULL; + intel_bo_unmap (bo); + _cairo_error_throw (CAIRO_STATUS_DEVICE_ERROR); + return NULL; } return bo->virtual; @@ -471,7 +188,7 @@ intel_bo_unmap (intel_bo_t *bo) bo->virtual = NULL; } -static cairo_bool_t +cairo_bool_t intel_bo_is_inactive (const intel_device_t *device, const intel_bo_t *bo) { struct drm_i915_gem_busy busy; @@ -484,6 +201,21 @@ intel_bo_is_inactive (const intel_device_t *device, const intel_bo_t *bo) return ! busy.busy; } +void +intel_bo_wait (const intel_device_t *device, const intel_bo_t *bo) +{ + struct drm_i915_gem_set_domain set_domain; + int ret; + + set_domain.handle = bo->base.handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = 0; + + do { + ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + } while (ret == -1 && errno == EINTR); +} + static inline int pot (int v) { @@ -515,7 +247,7 @@ intel_bo_cache_remove (intel_device_t *device, _cairo_freepool_free (&device->bo_pool, bo); } -static cairo_bool_t +cairo_bool_t intel_bo_madvise (intel_device_t *device, intel_bo_t *bo, int advice) @@ -548,7 +280,7 @@ intel_bo_cache_purge (intel_device_t *device) } } -cairo_drm_bo_t * +intel_bo_t * intel_bo_create (intel_device_t *device, uint32_t size, cairo_bool_t gpu_target) @@ -584,18 +316,19 @@ intel_bo_create (intel_device_t *device, * cause serialisation... */ - if (! intel_bo_madvise (device, bo, I915_MADV_WILLNEED)) { - intel_bo_cache_remove (device, bo, bucket); - goto retry; - } - if (device->bo_cache[bucket].num_entries-- > - device->bo_cache[bucket].min_entries) + device->bo_cache[bucket].min_entries) { device->bo_cache_size -= bo->base.size; } cairo_list_del (&bo->cache_list); - CAIRO_MUTEX_UNLOCK (device->bo_mutex); + + if (! intel_bo_madvise (device, bo, I915_MADV_WILLNEED)) { + _cairo_drm_bo_close (&device->base, &bo->base); + _cairo_freepool_free (&device->bo_pool, bo); + goto retry; + } + goto DONE; } @@ -608,18 +341,19 @@ intel_bo_create (intel_device_t *device, bo = cairo_list_first_entry (&device->bo_cache[bucket].list, intel_bo_t, cache_list); if (intel_bo_is_inactive (device, bo)) { - if (! intel_bo_madvise (device, bo, I915_MADV_WILLNEED)) { - intel_bo_cache_remove (device, bo, bucket); - goto retry; - } - if (device->bo_cache[bucket].num_entries-- > device->bo_cache[bucket].min_entries) { device->bo_cache_size -= bo->base.size; } cairo_list_del (&bo->cache_list); - CAIRO_MUTEX_UNLOCK (device->bo_mutex); + + if (! intel_bo_madvise (device, bo, I915_MADV_WILLNEED)) { + _cairo_drm_bo_close (&device->base, &bo->base); + _cairo_freepool_free (&device->bo_pool, bo); + goto retry; + } + goto DONE; } } @@ -646,10 +380,9 @@ intel_bo_create (intel_device_t *device, /* no cached buffer available, allocate fresh */ bo = _cairo_freepool_alloc (&device->bo_pool); - CAIRO_MUTEX_UNLOCK (device->bo_mutex); if (unlikely (bo == NULL)) { _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - return NULL; + goto UNLOCK; } cairo_list_init (&bo->cache_list); @@ -663,29 +396,37 @@ intel_bo_create (intel_device_t *device, bo->tiling = I915_TILING_NONE; bo->stride = 0; bo->swizzle = I915_BIT_6_SWIZZLE_NONE; + bo->purgeable = 0; + + bo->opaque0 = 0; + bo->opaque1 = 0; - bo->in_batch = FALSE; - bo->read_domains = 0; - bo->write_domain = 0; + bo->exec = NULL; + bo->batch_read_domains = 0; + bo->batch_write_domain = 0; + cairo_list_init (&bo->link); create.size = size; create.handle = 0; ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_CREATE, &create); if (unlikely (ret != 0)) { _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - free (bo); - return NULL; + _cairo_freepool_free (&device->bo_pool, bo); + bo = NULL; + goto UNLOCK; } bo->base.handle = create.handle; DONE: CAIRO_REFERENCE_COUNT_INIT (&bo->base.ref_count, 1); +UNLOCK: + CAIRO_MUTEX_UNLOCK (device->bo_mutex); - return &bo->base; + return bo; } -cairo_drm_bo_t * +intel_bo_t * intel_bo_create_for_name (intel_device_t *device, uint32_t name) { struct drm_i915_gem_get_tiling get_tiling; @@ -702,40 +443,48 @@ intel_bo_create_for_name (intel_device_t *device, uint32_t name) } status = _cairo_drm_bo_open_for_name (&device->base, &bo->base, name); - if (unlikely (status)) { - _cairo_freepool_free (&device->bo_pool, bo); - return NULL; - } + if (unlikely (status)) + goto FAIL; CAIRO_REFERENCE_COUNT_INIT (&bo->base.ref_count, 1); cairo_list_init (&bo->cache_list); bo->offset = 0; bo->virtual = NULL; + bo->purgeable = 0; + + bo->opaque0 = 0; + bo->opaque1 = 0; - bo->in_batch = FALSE; - bo->read_domains = 0; - bo->write_domain = 0; + bo->exec = NULL; + bo->batch_read_domains = 0; + bo->batch_write_domain = 0; + cairo_list_init (&bo->link); memset (&get_tiling, 0, sizeof (get_tiling)); get_tiling.handle = bo->base.handle; ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); if (unlikely (ret != 0)) { - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); + _cairo_error_throw (CAIRO_STATUS_DEVICE_ERROR); _cairo_drm_bo_close (&device->base, &bo->base); - _cairo_freepool_free (&device->bo_pool, bo); - return NULL; + goto FAIL; } bo->tiling = get_tiling.tiling_mode; bo->swizzle = get_tiling.swizzle_mode; // bo->stride = get_tiling.stride; /* XXX not available from get_tiling */ - return &bo->base; + return bo; + +FAIL: + CAIRO_MUTEX_LOCK (device->bo_mutex); + _cairo_freepool_free (&device->bo_pool, bo); + CAIRO_MUTEX_UNLOCK (device->bo_mutex); + return NULL; } -void +static void intel_bo_release (void *_dev, void *_bo) { intel_device_t *device = _dev; @@ -747,7 +496,10 @@ intel_bo_release (void *_dev, void *_bo) bucket = ffs (bo->base.size / 4096) - 1; CAIRO_MUTEX_LOCK (device->bo_mutex); - if (bo->base.name == 0 && bucket < INTEL_BO_CACHE_BUCKETS) { + if (bo->base.name == 0 && + bucket < INTEL_BO_CACHE_BUCKETS && + intel_bo_madvise (device, bo, I915_MADV_DONTNEED)) + { if (++device->bo_cache[bucket].num_entries > device->bo_cache[bucket].min_entries) { @@ -755,8 +507,6 @@ intel_bo_release (void *_dev, void *_bo) } cairo_list_add_tail (&bo->cache_list, &device->bo_cache[bucket].list); - - intel_bo_madvise (device, bo, I915_MADV_DONTNEED); } else { @@ -767,7 +517,7 @@ intel_bo_release (void *_dev, void *_bo) } void -intel_bo_set_tiling (intel_device_t *device, +intel_bo_set_tiling (const intel_device_t *device, intel_bo_t *bo, uint32_t tiling, uint32_t stride) @@ -781,7 +531,7 @@ intel_bo_set_tiling (intel_device_t *device, return; } - assert (! bo->in_batch); + assert (bo->exec == NULL); if (bo->virtual) intel_bo_unmap (bo); @@ -790,7 +540,9 @@ intel_bo_set_tiling (intel_device_t *device, set_tiling.tiling_mode = tiling; set_tiling.stride = stride; - ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + do { + ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && errno == EINTR); if (ret == 0) { bo->tiling = set_tiling.tiling_mode; bo->swizzle = set_tiling.swizzle_mode; @@ -859,6 +611,148 @@ intel_bo_get_image (const intel_device_t *device, return &image->base; } +static cairo_status_t +_intel_bo_put_a1_image (intel_device_t *dev, + intel_bo_t *bo, int stride, + cairo_image_surface_t *src, + int src_x, int src_y, + int width, int height, + int dst_x, int dst_y) +{ + uint8_t buf[CAIRO_STACK_BUFFER_SIZE]; + uint8_t *a8 = buf; + uint8_t *data; + int x; + + data = src->data + src_y * src->stride; + + if (bo->tiling == I915_TILING_NONE && width == stride) { + uint8_t *p; + int size; + + size = stride * height; + if (size > (int) sizeof (buf)) { + a8 = _cairo_malloc_ab (stride, height); + if (a8 == NULL) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + p = a8; + while (height--) { + for (x = 0; x < width; x++) { + int i = src_x + x; + int byte = i / 8; + int bit = i % 8; + p[x] = data[byte] & (1 << bit) ? 0xff : 0x00; + } + + data += src->stride; + p += stride; + } + + intel_bo_write (dev, bo, + dst_y * stride + dst_x, /* XXX bo_offset */ + size, a8); + } else { + uint8_t *dst; + + if (width > (int) sizeof (buf)) { + a8 = malloc (width); + if (a8 == NULL) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + dst = intel_bo_map (dev, bo); + if (dst == NULL) { + if (a8 != buf) + free (a8); + return _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + } + + dst += dst_y * stride + dst_x; /* XXX bo_offset */ + while (height--) { + for (x = 0; x < width; x++) { + int i = src_x + x; + int byte = i / 8; + int bit = i % 8; + a8[x] = data[byte] & (1 << bit) ? 0xff : 0x00; + } + + memcpy (dst, a8, width); + dst += stride; + data += src->stride; + } + intel_bo_unmap (bo); + } + + if (a8 != buf) + free (a8); + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +intel_bo_put_image (intel_device_t *dev, + intel_bo_t *bo, int stride, + cairo_image_surface_t *src, + int src_x, int src_y, + int width, int height, + int dst_x, int dst_y) +{ + uint8_t *data; + int size; + int offset; + + offset = dst_y * stride; + data = src->data + src_y * src->stride; + switch (src->format) { + default: + case CAIRO_FORMAT_ARGB32: + case CAIRO_FORMAT_RGB24: + offset += 4 * dst_x; + data += 4 * src_x; + size = 4 * width; + break; + case CAIRO_FORMAT_A8: + offset += dst_x; + data += src_x; + size = width; + break; + case CAIRO_FORMAT_A1: + return _intel_bo_put_a1_image (dev, + bo, stride, src, + src_x, src_y, + width, height, + dst_x, dst_y); + } + + if (bo->tiling == I915_TILING_NONE) { + if (src->stride == stride) { + intel_bo_write (dev, bo, offset, stride * height, data); + } else while (height--) { + intel_bo_write (dev, bo, offset, size, data); + offset += stride; + data += src->stride; + } + } else { + uint8_t *dst; + + dst = intel_bo_map (dev, bo); + if (unlikely (dst == NULL)) + return _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + + dst += offset; + while (height--) { + memcpy (dst, data, size); + dst += stride; + data += src->stride; + } + intel_bo_unmap (bo); + } + + return CAIRO_STATUS_SUCCESS; +} + static void _intel_device_init_bo_cache (intel_device_t *device) { @@ -883,23 +777,69 @@ _intel_device_init_bo_cache (intel_device_t *device) } _cairo_freepool_init (&device->bo_pool, sizeof (intel_bo_t)); + + device->base.surface.flink = _cairo_drm_surface_flink; + device->base.surface.map_to_image = intel_surface_map_to_image; +} + +static cairo_bool_t +_intel_snapshot_cache_entry_can_remove (const void *closure) +{ + return TRUE; +} + +static void +_intel_snapshot_cache_entry_destroy (void *closure) +{ + intel_surface_t *surface = cairo_container_of (closure, + intel_surface_t, + snapshot_cache_entry); + + surface->snapshot_cache_entry.hash = 0; + cairo_surface_destroy (&surface->drm.base); } cairo_status_t intel_device_init (intel_device_t *device, int fd) { struct drm_i915_gem_get_aperture aperture; + cairo_status_t status; + size_t size; int ret; + int n; ret = ioctl (fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); if (ret != 0) - return _cairo_error (CAIRO_STATUS_NO_MEMORY); + return _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + + CAIRO_MUTEX_INIT (device->mutex); device->gtt_max_size = aperture.aper_size; device->gtt_avail_size = aperture.aper_available_size; + device->gtt_avail_size -= device->gtt_avail_size >> 5; _intel_device_init_bo_cache (device); + size = aperture.aper_size / 8; + device->snapshot_cache_max_size = size / 4; + status = _cairo_cache_init (&device->snapshot_cache, + NULL, + _intel_snapshot_cache_entry_can_remove, + _intel_snapshot_cache_entry_destroy, + size); + if (unlikely (status)) + return status; + + device->glyph_cache_mapped = FALSE; + for (n = 0; n < ARRAY_LENGTH (device->glyph_cache); n++) { + device->glyph_cache[n].buffer.bo = NULL; + cairo_list_init (&device->glyph_cache[n].rtree.pinned); + } + + device->gradient_cache.size = 0; + + device->base.bo.release = intel_bo_release; + return CAIRO_STATUS_SUCCESS; } @@ -920,10 +860,42 @@ _intel_bo_cache_fini (intel_device_t *device) CAIRO_MUTEX_FINI (device->bo_mutex); } +static void +_intel_gradient_cache_fini (intel_device_t *device) +{ + unsigned int n; + + for (n = 0; n < device->gradient_cache.size; n++) { + _cairo_pattern_fini (&device->gradient_cache.cache[n].pattern.base); + if (device->gradient_cache.cache[n].buffer.bo != NULL) + cairo_drm_bo_destroy (&device->base.base, + &device->gradient_cache.cache[n].buffer.bo->base); + } +} + +static void +_intel_glyph_cache_fini (intel_device_t *device, intel_buffer_cache_t *cache) +{ + if (cache->buffer.bo == NULL) + return; + + intel_bo_destroy (device, cache->buffer.bo); + _cairo_rtree_fini (&cache->rtree); +} + void intel_device_fini (intel_device_t *device) { + int n; + + for (n = 0; n < ARRAY_LENGTH (device->glyph_cache); n++) + _intel_glyph_cache_fini (device, &device->glyph_cache[n]); + + _cairo_cache_fini (&device->snapshot_cache); + + _intel_gradient_cache_fini (device); _intel_bo_cache_fini (device); + _cairo_drm_device_fini (&device->base); } @@ -932,3 +904,591 @@ intel_throttle (intel_device_t *device) { ioctl (device->base.fd, DRM_IOCTL_I915_GEM_THROTTLE); } + +void +intel_glyph_cache_unmap (intel_device_t *device) +{ + int n; + + if (likely (! device->glyph_cache_mapped)) + return; + + for (n = 0; n < ARRAY_LENGTH (device->glyph_cache); n++) { + if (device->glyph_cache[n].buffer.bo != NULL && + device->glyph_cache[n].buffer.bo->virtual != NULL) + { + intel_bo_unmap (device->glyph_cache[n].buffer.bo); + } + } + + device->glyph_cache_mapped = FALSE; +} + +void +intel_glyph_cache_unpin (intel_device_t *device) +{ + int n; + + for (n = 0; n < ARRAY_LENGTH (device->glyph_cache); n++) + _cairo_rtree_unpin (&device->glyph_cache[n].rtree); +} + +static cairo_status_t +intel_glyph_cache_add_glyph (intel_device_t *device, + intel_buffer_cache_t *cache, + cairo_scaled_glyph_t *scaled_glyph) +{ + cairo_image_surface_t *glyph_surface = scaled_glyph->surface; + intel_glyph_t *glyph; + cairo_rtree_node_t *node = NULL; + double sf_x, sf_y; + cairo_status_t status; + uint8_t *dst, *src; + int width, height; + + width = glyph_surface->width; + if (width < GLYPH_CACHE_MIN_SIZE) + width = GLYPH_CACHE_MIN_SIZE; + height = glyph_surface->height; + if (height < GLYPH_CACHE_MIN_SIZE) + height = GLYPH_CACHE_MIN_SIZE; + + /* search for an available slot */ + status = _cairo_rtree_insert (&cache->rtree, width, height, &node); + /* search for an unpinned slot */ + if (status == CAIRO_INT_STATUS_UNSUPPORTED) { + status = _cairo_rtree_evict_random (&cache->rtree, width, height, &node); + if (status == CAIRO_STATUS_SUCCESS) + status = _cairo_rtree_node_insert (&cache->rtree, node, width, height, &node); + } + if (unlikely (status)) + return status; + + height = glyph_surface->height; + src = glyph_surface->data; + dst = cache->buffer.bo->virtual; + if (dst == NULL) { + dst = intel_bo_map (device, cache->buffer.bo); + if (unlikely (dst == NULL)) + return _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + } + + dst += node->y * cache->buffer.stride; + switch (glyph_surface->format) { + case CAIRO_FORMAT_A1: { + uint8_t buf[CAIRO_STACK_BUFFER_SIZE]; + uint8_t *a8 = buf; + int x; + + if (width > (int) sizeof (buf)) { + a8 = malloc (width); + if (unlikely (a8 == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + dst += node->x; + width = glyph_surface->width; + while (height--) { + for (x = 0; x < width; x++) + a8[x] = src[x>>3] & (1 << (x&7)) ? 0xff : 0x00; + + memcpy (dst, a8, width); + dst += cache->buffer.stride; + src += glyph_surface->stride; + } + + if (a8 != buf) + free (a8); + break; + } + + case CAIRO_FORMAT_A8: + dst += node->x; + width = glyph_surface->width; + while (height--) { + memcpy (dst, src, width); + dst += cache->buffer.stride; + src += glyph_surface->stride; + } + break; + + default: + ASSERT_NOT_REACHED; + case CAIRO_FORMAT_RGB24: + case CAIRO_FORMAT_ARGB32: + dst += 4*node->x; + width = 4*glyph_surface->width; + while (height--) { + memcpy (dst, src, width); + dst += cache->buffer.stride; + src += glyph_surface->stride; + } + break; + } + + /* leave mapped! */ + device->glyph_cache_mapped = TRUE; + + scaled_glyph->surface_private = node; + + glyph= (intel_glyph_t *) node; + glyph->node.owner = &scaled_glyph->surface_private; + glyph->cache = cache; + + /* compute tex coords: bottom-right, bottom-left, top-left */ + sf_x = 1. / cache->buffer.width; + sf_y = 1. / cache->buffer.height; + glyph->texcoord[0] = + texcoord_2d_16 (sf_x * (node->x + glyph_surface->width + NEAREST_BIAS), + sf_y * (node->y + glyph_surface->height + NEAREST_BIAS)); + glyph->texcoord[1] = + texcoord_2d_16 (sf_x * (node->x + NEAREST_BIAS), + sf_y * (node->y + glyph_surface->height + NEAREST_BIAS)); + glyph->texcoord[2] = + texcoord_2d_16 (sf_x * (node->x + NEAREST_BIAS), + sf_y * (node->y + NEAREST_BIAS)); + + return CAIRO_STATUS_SUCCESS; +} + +void +intel_scaled_glyph_fini (cairo_scaled_glyph_t *scaled_glyph, + cairo_scaled_font_t *scaled_font) +{ + intel_glyph_t *glyph; + + glyph = scaled_glyph->surface_private; + if (glyph != NULL) { + glyph->node.owner = NULL; + if (! glyph->node.pinned) { + intel_buffer_cache_t *cache; + + /* XXX thread-safety? Probably ok due to the frozen scaled-font. */ + cache = glyph->cache; + assert (cache != NULL); + + glyph->node.state = CAIRO_RTREE_NODE_AVAILABLE; + cairo_list_move (&glyph->node.link, + &cache->rtree.available); + + if (! glyph->node.parent->pinned) + _cairo_rtree_node_collapse (&cache->rtree, glyph->node.parent); + } + } +} + +void +intel_scaled_font_fini (cairo_scaled_font_t *scaled_font) +{ + intel_device_t *device; + + device = scaled_font->surface_private; + if (device != NULL) { + /* XXX decouple? */ + } +} + +static cairo_status_t +intel_get_glyph_cache (intel_device_t *device, + cairo_format_t format, + intel_buffer_cache_t **out) +{ + intel_buffer_cache_t *cache; + cairo_status_t status; + + switch (format) { + case CAIRO_FORMAT_ARGB32: + case CAIRO_FORMAT_RGB24: + cache = &device->glyph_cache[0]; + format = CAIRO_FORMAT_ARGB32; + break; + case CAIRO_FORMAT_A8: + case CAIRO_FORMAT_A1: + cache = &device->glyph_cache[1]; + format = CAIRO_FORMAT_A8; + break; + default: + ASSERT_NOT_REACHED; + } + + if (unlikely (cache->buffer.bo == NULL)) { + status = intel_buffer_cache_init (cache, device, format, + INTEL_GLYPH_CACHE_WIDTH, + INTEL_GLYPH_CACHE_HEIGHT); + if (unlikely (status)) + return status; + + _cairo_rtree_init (&cache->rtree, + INTEL_GLYPH_CACHE_WIDTH, + INTEL_GLYPH_CACHE_HEIGHT, + 0, sizeof (intel_glyph_t), NULL); + } + + *out = cache; + return CAIRO_STATUS_SUCCESS; +} + +cairo_int_status_t +intel_get_glyph (intel_device_t *device, + cairo_scaled_font_t *scaled_font, + cairo_scaled_glyph_t *scaled_glyph) +{ + cairo_bool_t own_surface = FALSE; + intel_buffer_cache_t *cache; + cairo_status_t status; + + if (scaled_glyph->surface == NULL) { + status = + scaled_font->backend->scaled_glyph_init (scaled_font, + scaled_glyph, + CAIRO_SCALED_GLYPH_INFO_SURFACE); + if (unlikely (status)) + return status; + + if (unlikely (scaled_glyph->surface == NULL)) + return CAIRO_INT_STATUS_UNSUPPORTED; + + own_surface = TRUE; + } + + if (unlikely (scaled_glyph->surface->width == 0 || + scaled_glyph->surface->height == 0)) + { + return CAIRO_INT_STATUS_NOTHING_TO_DO; + } + + if (unlikely (scaled_glyph->surface->width > GLYPH_CACHE_MAX_SIZE || + scaled_glyph->surface->height > GLYPH_CACHE_MAX_SIZE)) + { + return CAIRO_INT_STATUS_UNSUPPORTED; + } + + status = intel_get_glyph_cache (device, + scaled_glyph->surface->format, + &cache); + if (unlikely (status)) + return status; + + status = intel_glyph_cache_add_glyph (device, cache, scaled_glyph); + if (unlikely (_cairo_status_is_error (status))) + return status; + + if (unlikely (status == CAIRO_INT_STATUS_UNSUPPORTED)) { + /* no room, replace entire cache */ + + assert (cache->buffer.bo->exec != NULL); + + _cairo_rtree_reset (&cache->rtree); + intel_bo_destroy (device, cache->buffer.bo); + cache->buffer.bo = NULL; + + status = intel_buffer_cache_init (cache, device, + scaled_glyph->surface->format, + GLYPH_CACHE_WIDTH, + GLYPH_CACHE_HEIGHT); + if (unlikely (status)) + return status; + + status = intel_glyph_cache_add_glyph (device, cache, scaled_glyph); + if (unlikely (status)) + return status; + } + + if (own_surface) { + /* and release the copy of the image from system memory */ + cairo_surface_destroy (&scaled_glyph->surface->base); + scaled_glyph->surface = NULL; + } + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +intel_buffer_cache_init (intel_buffer_cache_t *cache, + intel_device_t *device, + cairo_format_t format, + int width, int height) +{ + const uint32_t tiling = I915_TILING_Y; + + assert ((width & 3) == 0); + assert ((height & 1) == 0); + cache->buffer.format = format; + cache->buffer.width = width; + cache->buffer.height = height; + + switch (format) { + case CAIRO_FORMAT_A1: + case CAIRO_FORMAT_RGB24: + ASSERT_NOT_REACHED; + case CAIRO_FORMAT_ARGB32: + cache->buffer.map0 = MAPSURF_32BIT | MT_32BIT_ARGB8888; + cache->buffer.stride = width * 4; + break; + case CAIRO_FORMAT_A8: + cache->buffer.map0 = MAPSURF_8BIT | MT_8BIT_I8; + cache->buffer.stride = width; + break; + } + cache->buffer.map0 |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + cache->buffer.map1 = ((cache->buffer.stride / 4) - 1) << MS4_PITCH_SHIFT; + + cache->buffer.bo = intel_bo_create (device, + height * cache->buffer.stride, FALSE); + if (unlikely (cache->buffer.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + intel_bo_set_tiling (device, cache->buffer.bo, tiling, cache->buffer.stride); + + cache->buffer.map0 |= MS3_tiling (cache->buffer.bo->tiling); + + cache->ref_count = 0; + cairo_list_init (&cache->link); + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +intel_snapshot_cache_insert (intel_device_t *device, + intel_surface_t *surface) +{ + cairo_status_t status; + int bpp; + + bpp = 1; + if (surface->drm.format != CAIRO_FORMAT_A8) + bpp = 4; + + surface->snapshot_cache_entry.hash = (unsigned long) surface; + surface->snapshot_cache_entry.size = + surface->drm.width * surface->drm.height * bpp; + + if (surface->snapshot_cache_entry.size > + device->snapshot_cache_max_size) + { + return CAIRO_STATUS_SUCCESS; + } + + if (device->snapshot_cache.freeze_count == 0) + _cairo_cache_freeze (&device->snapshot_cache); + + status = _cairo_cache_insert (&device->snapshot_cache, + &surface->snapshot_cache_entry); + if (unlikely (status)) { + surface->snapshot_cache_entry.hash = 0; + return status; + } + + cairo_surface_reference (&surface->drm.base); + + return CAIRO_STATUS_SUCCESS; +} + +void +intel_surface_detach_snapshot (cairo_surface_t *abstract_surface) +{ + intel_surface_t *surface = (intel_surface_t *) abstract_surface; + + if (surface->snapshot_cache_entry.hash) { + intel_device_t *device; + + device = (intel_device_t *) surface->drm.base.device; + _cairo_cache_remove (&device->snapshot_cache, + &surface->snapshot_cache_entry); + surface->snapshot_cache_entry.hash = 0; + } +} + +void +intel_snapshot_cache_thaw (intel_device_t *device) +{ + if (device->snapshot_cache.freeze_count) + _cairo_cache_thaw (&device->snapshot_cache); +} + +static cairo_bool_t +_gradient_color_stops_equal (const cairo_gradient_pattern_t *a, + const cairo_gradient_pattern_t *b) +{ + unsigned int n; + + if (a->n_stops != b->n_stops) + return FALSE; + + for (n = 0; n < a->n_stops; n++) { + if (_cairo_fixed_from_double (a->stops[n].offset) != + _cairo_fixed_from_double (b->stops[n].offset)) + { + return FALSE; + } + + if (! _cairo_color_equal (&a->stops[n].color, &b->stops[n].color)) + return FALSE; + } + + return TRUE; +} + +static uint32_t +hars_petruska_f54_1_random (void) +{ +#define rol(x,k) ((x << k) | (x >> (32-k))) + static uint32_t x; + return x = (x ^ rol (x, 5) ^ rol (x, 24)) + 0x37798849; +#undef rol +} + +static int +intel_gradient_sample_width (const cairo_gradient_pattern_t *gradient) +{ + unsigned int n; + int width; + + width = 8; + for (n = 1; n < gradient->n_stops; n++) { + double dx = gradient->stops[n].offset - gradient->stops[n-1].offset; + double delta, max; + int ramp; + + if (dx == 0) + continue; + + max = gradient->stops[n].color.red - + gradient->stops[n-1].color.red; + + delta = gradient->stops[n].color.green - + gradient->stops[n-1].color.green; + if (delta > max) + max = delta; + + delta = gradient->stops[n].color.blue - + gradient->stops[n-1].color.blue; + if (delta > max) + max = delta; + + delta = gradient->stops[n].color.alpha - + gradient->stops[n-1].color.alpha; + if (delta > max) + max = delta; + + ramp = 128 * max / dx; + if (ramp > width) + width = ramp; + } + + width = (width + 7) & -8; + return MIN (width, 1024); +} + +cairo_status_t +intel_gradient_render (intel_device_t *device, + const cairo_gradient_pattern_t *pattern, + intel_buffer_t *buffer) +{ + pixman_image_t *gradient, *image; + pixman_gradient_stop_t pixman_stops_stack[32]; + pixman_gradient_stop_t *pixman_stops; + pixman_point_fixed_t p1, p2; + int width; + unsigned int i; + cairo_status_t status; + + for (i = 0; i < device->gradient_cache.size; i++) { + if (_gradient_color_stops_equal (pattern, + &device->gradient_cache.cache[i].pattern.gradient.base)) { + *buffer = device->gradient_cache.cache[i].buffer; + return CAIRO_STATUS_SUCCESS; + } + } + + pixman_stops = pixman_stops_stack; + if (unlikely (pattern->n_stops > ARRAY_LENGTH (pixman_stops_stack))) { + pixman_stops = _cairo_malloc_ab (pattern->n_stops, + sizeof (pixman_gradient_stop_t)); + if (unlikely (pixman_stops == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + for (i = 0; i < pattern->n_stops; i++) { + pixman_stops[i].x = _cairo_fixed_16_16_from_double (pattern->stops[i].offset); + pixman_stops[i].color.red = pattern->stops[i].color.red_short; + pixman_stops[i].color.green = pattern->stops[i].color.green_short; + pixman_stops[i].color.blue = pattern->stops[i].color.blue_short; + pixman_stops[i].color.alpha = pattern->stops[i].color.alpha_short; + } + + width = intel_gradient_sample_width (pattern); + + p1.x = 0; + p1.y = 0; + p2.x = width << 16; + p2.y = 0; + + gradient = pixman_image_create_linear_gradient (&p1, &p2, + pixman_stops, + pattern->n_stops); + if (pixman_stops != pixman_stops_stack) + free (pixman_stops); + + if (unlikely (gradient == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + pixman_image_set_filter (gradient, PIXMAN_FILTER_BILINEAR, NULL, 0); + + image = pixman_image_create_bits (PIXMAN_a8r8g8b8, width, 1, NULL, 0); + if (unlikely (image == NULL)) { + pixman_image_unref (gradient); + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + pixman_image_composite (PIXMAN_OP_SRC, + gradient, NULL, image, + 0, 0, + 0, 0, + 0, 0, + width, 1); + + pixman_image_unref (gradient); + + buffer->bo = intel_bo_create (device, 4*width, FALSE); + if (unlikely (buffer->bo == NULL)) { + pixman_image_unref (image); + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + intel_bo_write (device, buffer->bo, 0, 4*width, pixman_image_get_data (image)); + pixman_image_unref (image); + + buffer->offset = 0; + buffer->width = width; + buffer->height = 1; + buffer->stride = 4*width; + buffer->format = CAIRO_FORMAT_ARGB32; + buffer->map0 = MAPSURF_32BIT | MT_32BIT_ARGB8888; + buffer->map0 |= MS3_tiling (buffer->bo->tiling); + buffer->map0 |= ((width - 1) << MS3_WIDTH_SHIFT); + buffer->map1 = (width - 1) << MS4_PITCH_SHIFT; + + if (device->gradient_cache.size < GRADIENT_CACHE_SIZE) { + i = device->gradient_cache.size++; + } else { + i = hars_petruska_f54_1_random () % GRADIENT_CACHE_SIZE; + _cairo_pattern_fini (&device->gradient_cache.cache[i].pattern.base); + intel_bo_destroy (device, device->gradient_cache.cache[i].buffer.bo); + } + + status = _cairo_pattern_init_copy (&device->gradient_cache.cache[i].pattern.base, + &pattern->base); + if (unlikely (status)) { + intel_bo_destroy (device, buffer->bo); + /* Ensure the cache is correctly initialised for i965_device_destroy */ + _cairo_pattern_init_solid (&device->gradient_cache.cache[i].pattern.solid, + CAIRO_COLOR_TRANSPARENT, + CAIRO_CONTENT_ALPHA); + return status; + } + + device->gradient_cache.cache[i].buffer = *buffer; + return CAIRO_STATUS_SUCCESS; +} diff --git a/src/drm/cairo-drm-private.h b/src/drm/cairo-drm-private.h index 0f1b735f..549832d2 100644 --- a/src/drm/cairo-drm-private.h +++ b/src/drm/cairo-drm-private.h @@ -38,11 +38,14 @@ #include "cairo-drm.h" -#include "cairo-surface-private.h" +#include "cairo-device-private.h" #include "cairo-reference-count-private.h" +#include "cairo-surface-private.h" #include <sys/types.h> /* dev_t */ +typedef struct _cairo_drm_device cairo_drm_device_t; + typedef cairo_drm_device_t * (*cairo_drm_device_create_func_t) (int fd, dev_t dev, @@ -50,6 +53,9 @@ typedef cairo_drm_device_t * int chip_id); typedef cairo_int_status_t +(*cairo_drm_device_flush_func_t) (cairo_drm_device_t *device); + +typedef cairo_int_status_t (*cairo_drm_device_throttle_func_t) (cairo_drm_device_t *device); typedef void @@ -76,11 +82,15 @@ typedef cairo_int_status_t typedef cairo_status_t (*cairo_drm_surface_enable_scan_out_func_t) (void *surface); +typedef cairo_surface_t * +(*cairo_drm_surface_map_to_image_func_t) (void *surface); + typedef struct _cairo_drm_bo_backend { void (*release) (void *device, void *bo); } cairo_drm_bo_backend_t; typedef struct _cairo_drm_device_backend { + cairo_drm_device_flush_func_t flush; cairo_drm_device_throttle_func_t throttle; cairo_drm_device_destroy_func_t destroy; } cairo_drm_device_backend_t; @@ -91,6 +101,7 @@ typedef struct _cairo_drm_surface_backend { cairo_drm_surface_create_from_cacheable_image_func_t create_from_cacheable_image; cairo_drm_surface_flink_func_t flink; cairo_drm_surface_enable_scan_out_func_t enable_scan_out; + cairo_drm_surface_map_to_image_func_t map_to_image; } cairo_drm_surface_backend_t; typedef struct _cairo_drm_bo { @@ -101,9 +112,10 @@ typedef struct _cairo_drm_bo { } cairo_drm_bo_t; struct _cairo_drm_device { - cairo_reference_count_t ref_count; - cairo_status_t status; + cairo_device_t base; + int vendor_id; + int chip_id; dev_t id; int fd; @@ -119,7 +131,6 @@ struct _cairo_drm_device { typedef struct _cairo_drm_surface { cairo_surface_t base; - cairo_drm_device_t *device; cairo_drm_bo_t *bo; cairo_format_t format; @@ -136,17 +147,16 @@ cairo_drm_bo_reference (cairo_drm_bo_t *bo) return bo; } -static inline void -cairo_drm_bo_destroy (cairo_drm_device_t *device, +static always_inline void +cairo_drm_bo_destroy (cairo_device_t *abstract_device, cairo_drm_bo_t *bo) { - if (_cairo_reference_count_dec_and_test (&bo->ref_count)) + if (_cairo_reference_count_dec_and_test (&bo->ref_count)) { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; device->bo.release (device, bo); + } } -cairo_private cairo_drm_device_t * -_cairo_drm_device_create_in_error (cairo_status_t status); - cairo_private cairo_status_t _cairo_drm_bo_open_for_name (const cairo_drm_device_t *dev, cairo_drm_bo_t *bo, @@ -181,56 +191,20 @@ _cairo_drm_surface_get_extents (void *abstract_surface, cairo_rectangle_int_t *rectangle); cairo_private cairo_int_status_t -_cairo_drm_surface_paint (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_clip_t *clip); - -cairo_private cairo_int_status_t -_cairo_drm_surface_mask (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - const cairo_pattern_t *mask, - cairo_clip_t *clip); - -cairo_private cairo_int_status_t -_cairo_drm_surface_stroke (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_path_fixed_t *path, - const cairo_stroke_style_t *style, - const cairo_matrix_t *ctm, - const cairo_matrix_t *ctm_inverse, - double tolerance, - cairo_antialias_t antialias, - cairo_clip_t *clip); - -cairo_private cairo_int_status_t -_cairo_drm_surface_fill (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_path_fixed_t *path, - cairo_fill_rule_t fill_rule, - double tolerance, - cairo_antialias_t antialias, - cairo_clip_t *clip); - -cairo_private cairo_int_status_t -_cairo_drm_surface_show_glyphs (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_glyph_t *glyphs, - int num_glyphs, - cairo_scaled_font_t *scaled_font, - cairo_clip_t *clip, - int *remaining_glyphs); - -cairo_private cairo_int_status_t _cairo_drm_surface_flink (void *abstract_surface); +static inline cairo_drm_device_t * +_cairo_drm_device_create_in_error (cairo_status_t status) +{ + return (cairo_drm_device_t *) _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); +} + cairo_private cairo_drm_device_t * _cairo_drm_device_init (cairo_drm_device_t *device, - int fd, dev_t id, + int fd, + dev_t devid, + int vendor_id, + int chip_id, int max_surface_size); cairo_private void @@ -242,6 +216,12 @@ cairo_private cairo_drm_device_t * _cairo_drm_intel_device_create (int fd, dev_t dev, int vendor_id, int chip_id); cairo_private cairo_drm_device_t * +_cairo_drm_i915_device_create (int fd, dev_t dev, int vendor_id, int chip_id); + +cairo_private cairo_drm_device_t * +_cairo_drm_i965_device_create (int fd, dev_t dev, int vendor_id, int chip_id); + +cairo_private cairo_drm_device_t * _cairo_drm_radeon_device_create (int fd, dev_t dev, int vendor_id, int chip_id); #if CAIRO_HAS_GALLIUM_SURFACE @@ -250,8 +230,13 @@ _cairo_drm_gallium_device_create (int fd, dev_t dev, int vendor_id, int chip_id) #endif slim_hidden_proto (cairo_drm_device_default); -slim_hidden_proto (cairo_drm_device_destroy); slim_hidden_proto (cairo_drm_device_get); -slim_hidden_proto_no_warn (cairo_drm_device_reference); +slim_hidden_proto (cairo_drm_device_get_for_fd); + +slim_hidden_proto (cairo_drm_surface_create_for_name); + +cairo_private cairo_bool_t +_cairo_drm_size_is_valid (cairo_device_t *abstract_device, + int width, int height); #endif /* CAIRO_DRM_PRIVATE_H */ diff --git a/src/drm/cairo-drm-radeon-private.h b/src/drm/cairo-drm-radeon-private.h index 9c0c3b46..e63bb04d 100644 --- a/src/drm/cairo-drm-radeon-private.h +++ b/src/drm/cairo-drm-radeon-private.h @@ -99,9 +99,6 @@ radeon_bo_create (radeon_device_t *dev, cairo_private cairo_drm_bo_t * radeon_bo_create_for_name (radeon_device_t *dev, uint32_t name); -cairo_private void -radeon_bo_release (void *_dev, void *_bo); - cairo_private cairo_surface_t * radeon_bo_get_image (const radeon_device_t *device, radeon_bo_t *bo, diff --git a/src/drm/cairo-drm-radeon-surface.c b/src/drm/cairo-drm-radeon-surface.c index 858f353f..7521199a 100644 --- a/src/drm/cairo-drm-radeon-surface.c +++ b/src/drm/cairo-drm-radeon-surface.c @@ -31,6 +31,7 @@ #include "cairo-drm-private.h" #include "cairo-drm-radeon-private.h" + #include "cairo-error-private.h" /* Basic stub surface for radeon chipsets */ @@ -42,7 +43,7 @@ typedef struct _radeon_surface { } radeon_surface_t; static inline radeon_device_t * -to_radeon_device (cairo_drm_device_t *device) +to_radeon_device (cairo_device_t *device) { return (radeon_device_t *) device; } @@ -53,19 +54,14 @@ to_radeon_bo (cairo_drm_bo_t *bo) return (radeon_bo_t *) bo; } -static cairo_status_t -radeon_batch_flush (radeon_device_t *device) -{ - return CAIRO_STATUS_SUCCESS; -} - -static cairo_status_t -radeon_surface_batch_flush (radeon_surface_t *surface) +static cairo_surface_t * +radeon_surface_create_similar (void *abstract_surface, + cairo_content_t content, + int width, + int height) { - if (to_radeon_bo (surface->base.bo)->write_domain) - return radeon_batch_flush (to_radeon_device (surface->base.device)); - - return CAIRO_STATUS_SUCCESS; + return cairo_image_surface_create (_cairo_format_from_content (content), + width, height); } static cairo_status_t @@ -85,6 +81,8 @@ radeon_surface_acquire_source_image (void *abstract_surface, cairo_surface_t *image; cairo_status_t status; + /* XXX batch flush */ + if (surface->base.fallback != NULL) { image = surface->base.fallback; goto DONE; @@ -95,13 +93,15 @@ radeon_surface_acquire_source_image (void *abstract_surface, if (image != NULL) goto DONE; - status = radeon_surface_batch_flush (surface); - if (unlikely (status)) - return status; + if (surface->base.base.backend->flush != NULL) { + status = surface->base.base.backend->flush (surface); + if (unlikely (status)) + return status; + } - image = radeon_bo_get_image (to_radeon_device (surface->base.device), - to_radeon_bo (surface->base.bo), - &surface->base); + image = radeon_bo_get_image (to_radeon_device (surface->base.base.device), + to_radeon_bo (surface->base.bo), + &surface->base); status = image->status; if (unlikely (status)) return status; @@ -121,94 +121,46 @@ DONE: } static void -radeon_surface_release_source_image (void *abstract_surface, - cairo_image_surface_t *image, - void *image_extra) +radeon_surface_release_source_image (void *abstract_surface, + cairo_image_surface_t *image, + void *image_extra) { cairo_surface_destroy (&image->base); } static cairo_surface_t * -radeon_surface_snapshot (void *abstract_surface) -{ - radeon_surface_t *surface = abstract_surface; - cairo_status_t status; - - if (surface->base.fallback != NULL) - return NULL; - - status = radeon_surface_batch_flush (surface); - if (unlikely (status)) - return _cairo_surface_create_in_error (status); - - return radeon_bo_get_image (to_radeon_device (surface->base.device), - to_radeon_bo (surface->base.bo), - &surface->base); -} - -static cairo_status_t -radeon_surface_acquire_dest_image (void *abstract_surface, - cairo_rectangle_int_t *interest_rect, - cairo_image_surface_t **image_out, - cairo_rectangle_int_t *image_rect_out, - void **image_extra) +radeon_surface_map_to_image (radeon_surface_t *surface) { - radeon_surface_t *surface = abstract_surface; - cairo_surface_t *image; - cairo_status_t status; - void *ptr; - - assert (surface->base.fallback == NULL); - - status = radeon_surface_batch_flush (surface); - if (unlikely (status)) - return status; - - /* Force a read barrier, as well as flushing writes above */ - radeon_bo_wait (to_radeon_device (surface->base.device), - to_radeon_bo (surface->base.bo)); + if (surface->base.fallback == NULL) { + cairo_surface_t *image; + cairo_status_t status; + void *ptr; + + if (surface->base.base.backend->flush != NULL) { + status = surface->base.base.backend->flush (surface); + if (unlikely (status)) + return _cairo_surface_create_in_error (status); + } - ptr = radeon_bo_map (to_radeon_device (surface->base.device), - to_radeon_bo (surface->base.bo)); - if (unlikely (ptr == NULL)) - return _cairo_error (CAIRO_STATUS_NO_MEMORY); + ptr = radeon_bo_map (to_radeon_device (surface->base.base.device), + to_radeon_bo (surface->base.bo)); + if (unlikely (ptr == NULL)) + return _cairo_surface_create_in_error (CAIRO_STATUS_NO_MEMORY); + + image = cairo_image_surface_create_for_data (ptr, + surface->base.format, + surface->base.width, + surface->base.height, + surface->base.stride); + if (unlikely (image->status)) { + radeon_bo_unmap (to_radeon_bo (surface->base.bo)); + return image; + } - image = cairo_image_surface_create_for_data (ptr, - surface->base.format, - surface->base.width, - surface->base.height, - surface->base.stride); - status = image->status; - if (unlikely (status)) { - radeon_bo_unmap (to_radeon_bo (surface->base.bo)); - return status; + surface->base.fallback = image; } - surface->base.fallback = cairo_surface_reference (image); - - *image_out = (cairo_image_surface_t *) image; - *image_extra = NULL; - - image_rect_out->x = 0; - image_rect_out->y = 0; - image_rect_out->width = surface->base.width; - image_rect_out->height = surface->base.height; - - return CAIRO_STATUS_SUCCESS; -} - -static void -radeon_surface_release_dest_image (void *abstract_surface, - cairo_rectangle_int_t *interest_rect, - cairo_image_surface_t *image, - cairo_rectangle_int_t *image_rect, - void *image_extra) -{ - /* Keep the fallback until we flush, either explicitly or at the - * end of this context. The idea is to avoid excess migration of - * the buffer between GPU and CPU domains. - */ - cairo_surface_destroy (&image->base); + return surface->base.fallback; } static cairo_status_t @@ -218,7 +170,7 @@ radeon_surface_flush (void *abstract_surface) cairo_status_t status; if (surface->base.fallback == NULL) - return radeon_surface_batch_flush (surface); + return CAIRO_STATUS_SUCCESS; /* kill any outstanding maps */ cairo_surface_finish (surface->base.fallback); @@ -232,53 +184,117 @@ radeon_surface_flush (void *abstract_surface) return status; } +static cairo_int_status_t +radeon_surface_paint (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_clip_t *clip) +{ + return _cairo_surface_paint (radeon_surface_map_to_image (abstract_surface), + op, source, clip); +} + +static cairo_int_status_t +radeon_surface_mask (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + const cairo_pattern_t *mask, + cairo_clip_t *clip) +{ + return _cairo_surface_mask (radeon_surface_map_to_image (abstract_surface), + op, source, mask, clip); +} + +static cairo_int_status_t +radeon_surface_stroke (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + const cairo_stroke_style_t *stroke_style, + const cairo_matrix_t *ctm, + const cairo_matrix_t *ctm_inverse, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + return _cairo_surface_stroke (radeon_surface_map_to_image (abstract_surface), + op, source, path, stroke_style, ctm, ctm_inverse, + tolerance, antialias, clip); +} + +static cairo_int_status_t +radeon_surface_fill (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + return _cairo_surface_fill (radeon_surface_map_to_image (abstract_surface), + op, source, path, fill_rule, + tolerance, antialias, clip); +} + +static cairo_int_status_t +radeon_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining) +{ + *num_remaining = 0; + return _cairo_surface_show_text_glyphs (radeon_surface_map_to_image (abstract_surface), + op, source, + NULL, 0, + glyphs, num_glyphs, + NULL, 0, 0, + scaled_font, clip); +} + static const cairo_surface_backend_t radeon_surface_backend = { CAIRO_SURFACE_TYPE_DRM, - _cairo_drm_surface_create_similar, - radeon_surface_finish, + radeon_surface_create_similar, + radeon_surface_finish, radeon_surface_acquire_source_image, radeon_surface_release_source_image, - radeon_surface_acquire_dest_image, - radeon_surface_release_dest_image, - - NULL, //radeon_surface_clone_similar, - NULL, //radeon_surface_composite, - NULL, //radeon_surface_fill_rectangles, - NULL, //radeon_surface_composite_trapezoids, - NULL, //radeon_surface_create_span_renderer, - NULL, //radeon_surface_check_span_renderer, + NULL, NULL, NULL, + NULL, /* composite */ + NULL, /* fill */ + NULL, /* trapezoids */ + NULL, /* span */ + NULL, /* check-span */ + NULL, /* copy_page */ NULL, /* show_page */ _cairo_drm_surface_get_extents, - NULL, /* old_show_glyphs */ + NULL, /* old-glyphs */ _cairo_drm_surface_get_font_options, - radeon_surface_flush, - NULL, /* mark_dirty_rectangle */ - NULL, //radeon_surface_scaled_font_fini, - NULL, //radeon_surface_scaled_glyph_fini, - - _cairo_drm_surface_paint, - _cairo_drm_surface_mask, - _cairo_drm_surface_stroke, - _cairo_drm_surface_fill, - _cairo_drm_surface_show_glyphs, - - radeon_surface_snapshot, - NULL, /* is_similar */ - - NULL, /* reset */ + radeon_surface_flush, + NULL, /* mark dirty */ + NULL, NULL, /* font/glyph fini */ + + radeon_surface_paint, + radeon_surface_mask, + radeon_surface_stroke, + radeon_surface_fill, + radeon_surface_glyphs, }; static void radeon_surface_init (radeon_surface_t *surface, - cairo_content_t content, - cairo_drm_device_t *device) + cairo_content_t content, + cairo_drm_device_t *device) { _cairo_surface_init (&surface->base.base, &radeon_surface_backend, - NULL, /* device */ + &device->base, content); _cairo_drm_surface_init (&surface->base, device); @@ -318,7 +334,7 @@ radeon_surface_create_internal (cairo_drm_device_t *device, surface->base.stride = cairo_format_stride_for_width (surface->base.format, width); - surface->base.bo = radeon_bo_create (to_radeon_device (device), + surface->base.bo = radeon_bo_create (to_radeon_device (&device->base), surface->base.stride * height, RADEON_GEM_DOMAIN_GTT); @@ -379,7 +395,7 @@ radeon_surface_create_for_name (cairo_drm_device_t *device, surface->base.height = height; surface->base.stride = stride; - surface->base.bo = radeon_bo_create_for_name (to_radeon_device (device), + surface->base.bo = radeon_bo_create_for_name (to_radeon_device (&device->base), name); if (unlikely (surface->base.bo == NULL)) { @@ -428,13 +444,12 @@ _cairo_drm_radeon_device_create (int fd, dev_t dev, int vendor_id, int chip_id) device->base.surface.flink = _cairo_drm_surface_flink; device->base.surface.enable_scan_out = NULL; + device->base.device.flush = NULL; device->base.device.throttle = NULL; device->base.device.destroy = radeon_device_destroy; - device->base.bo.release = radeon_bo_release; - device->vram_limit = vram_size; device->gart_limit = gart_size; - return _cairo_drm_device_init (&device->base, dev, fd, MAX_SIZE); + return _cairo_drm_device_init (&device->base, fd, dev, vendor_id, chip_id, MAX_SIZE); } diff --git a/src/drm/cairo-drm-radeon.c b/src/drm/cairo-drm-radeon.c index e435d705..a9683083 100644 --- a/src/drm/cairo-drm-radeon.c +++ b/src/drm/cairo-drm-radeon.c @@ -32,6 +32,7 @@ #include "cairo-drm-private.h" #include "cairo-drm-radeon-private.h" #include "cairo-drm-ioctl-private.h" + #include "cairo-error-private.h" #include <sys/ioctl.h> @@ -371,7 +372,7 @@ radeon_bo_create_for_name (radeon_device_t *device, return &bo->base; } -void +static void radeon_bo_release (void *_dev, void *_bo) { radeon_device_t *device = _dev; @@ -431,6 +432,8 @@ radeon_device_init (radeon_device_t *device, int fd) { _radeon_device_init_bo_cache (device); + device->base.bo.release = radeon_bo_release; + return CAIRO_STATUS_SUCCESS; } diff --git a/src/drm/cairo-drm-surface.c b/src/drm/cairo-drm-surface.c index 429b528d..e37d3e2f 100644 --- a/src/drm/cairo-drm-surface.c +++ b/src/drm/cairo-drm-surface.c @@ -33,8 +33,8 @@ #include "cairoint.h" #include "cairo-drm-private.h" + #include "cairo-error-private.h" -#include "cairo-surface-fallback-private.h" cairo_surface_t * _cairo_drm_surface_create_similar (void *abstract_surface, @@ -43,13 +43,8 @@ _cairo_drm_surface_create_similar (void *abstract_surface, int height) { cairo_drm_surface_t *surface = abstract_surface; - cairo_drm_device_t *device; - - if (surface->fallback != NULL) - return _cairo_image_surface_create_with_content (content, - width, height); + cairo_drm_device_t *device = (cairo_drm_device_t *) surface->base.device; - device = surface->device; if (width > device->max_surface_size || height > device->max_surface_size) return NULL; @@ -60,8 +55,6 @@ void _cairo_drm_surface_init (cairo_drm_surface_t *surface, cairo_drm_device_t *device) { - surface->device = cairo_drm_device_reference (device); - surface->bo = NULL; surface->width = 0; surface->height = 0; @@ -75,9 +68,7 @@ cairo_status_t _cairo_drm_surface_finish (cairo_drm_surface_t *surface) { if (surface->bo != NULL) - cairo_drm_bo_destroy (surface->device, surface->bo); - - cairo_drm_device_destroy (surface->device); + cairo_drm_bo_destroy (surface->base.device, surface->bo); return CAIRO_STATUS_SUCCESS; } @@ -105,147 +96,32 @@ _cairo_drm_surface_get_extents (void *abstract_surface, return TRUE; } -cairo_int_status_t -_cairo_drm_surface_paint (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_clip_t *clip) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) - return _cairo_surface_paint (surface->fallback, op, source, clip); - - return _cairo_surface_fallback_paint (&surface->base, op, source, clip); -} - -cairo_int_status_t -_cairo_drm_surface_mask (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - const cairo_pattern_t *mask, - cairo_clip_t *clip) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) { - return _cairo_surface_mask (surface->fallback, - op, source, mask, - clip); - } - - return _cairo_surface_fallback_mask (&surface->base, - op, source, mask, clip); -} - -cairo_int_status_t -_cairo_drm_surface_stroke (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_path_fixed_t *path, - const cairo_stroke_style_t *style, - const cairo_matrix_t *ctm, - const cairo_matrix_t *ctm_inverse, - double tolerance, - cairo_antialias_t antialias, - cairo_clip_t *clip) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) { - return _cairo_surface_stroke (surface->fallback, - op, source, - path, style, - ctm, ctm_inverse, - tolerance, antialias, - clip); - } - - return _cairo_surface_fallback_stroke (&surface->base, op, source, - path, style, - ctm, ctm_inverse, - tolerance, antialias, - clip); -} - -cairo_int_status_t -_cairo_drm_surface_fill (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_path_fixed_t *path, - cairo_fill_rule_t fill_rule, - double tolerance, - cairo_antialias_t antialias, - cairo_clip_t *clip) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) { - return _cairo_surface_fill (surface->fallback, - op, source, - path, fill_rule, - tolerance, antialias, - clip); - } - - return _cairo_surface_fallback_fill (&surface->base, op, source, - path, fill_rule, - tolerance, antialias, - clip); -} - -cairo_int_status_t -_cairo_drm_surface_show_glyphs (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_glyph_t *glyphs, - int num_glyphs, - cairo_scaled_font_t *scaled_font, - cairo_clip_t *clip, - int *remaining_glyphs) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) { - *remaining_glyphs = 0; - return _cairo_surface_show_text_glyphs (surface->fallback, - op, source, - NULL, 0, - glyphs, num_glyphs, - NULL, 0, 0, - scaled_font, - clip); - } - - return _cairo_surface_fallback_show_glyphs (&surface->base, - op, source, - glyphs, num_glyphs, - scaled_font, - clip); -} - - cairo_surface_t * -cairo_drm_surface_create (cairo_drm_device_t *device, +cairo_drm_surface_create (cairo_device_t *abstract_device, cairo_content_t content, int width, int height) { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; cairo_surface_t *surface; if (! CAIRO_CONTENT_VALID (content)) return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_CONTENT)); - if (device != NULL && device->status) + if (device != NULL && device->base.status) { - surface = _cairo_surface_create_in_error (device->status); + surface = _cairo_surface_create_in_error (device->base.status); } else if (device == NULL || device->surface.create == NULL || width == 0 || width > device->max_surface_size || height == 0 || height > device->max_surface_size) { - surface = _cairo_image_surface_create_with_content (content, - width, height); + surface = cairo_image_surface_create (_cairo_format_from_content (content), + width, height); + } + else if (device->base.finished) + { + surface = _cairo_surface_create_in_error (CAIRO_STATUS_SURFACE_FINISHED); } else { @@ -256,19 +132,20 @@ cairo_drm_surface_create (cairo_drm_device_t *device, } cairo_surface_t * -cairo_drm_surface_create_for_name (cairo_drm_device_t *device, +cairo_drm_surface_create_for_name (cairo_device_t *abstract_device, unsigned int name, cairo_format_t format, int width, int height, int stride) { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; cairo_surface_t *surface; if (! CAIRO_FORMAT_VALID (format)) return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); - if (device != NULL && device->status) + if (device != NULL && device->base.status) { - surface = _cairo_surface_create_in_error (device->status); + surface = _cairo_surface_create_in_error (device->base.status); } else if (device == NULL || device->surface.create_for_name == NULL) { @@ -280,6 +157,10 @@ cairo_drm_surface_create_for_name (cairo_drm_device_t *device, { surface = _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_SIZE)); } + else if (device->base.finished) + { + surface = _cairo_surface_create_in_error (CAIRO_STATUS_SURFACE_FINISHED); + } else { surface = device->surface.create_for_name (device, @@ -289,20 +170,25 @@ cairo_drm_surface_create_for_name (cairo_drm_device_t *device, return surface; } +slim_hidden_def (cairo_drm_surface_create_for_name); cairo_surface_t * -cairo_drm_surface_create_from_cacheable_image (cairo_drm_device_t *dev, +cairo_drm_surface_create_from_cacheable_image (cairo_device_t *abstract_device, cairo_surface_t *surface) { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; + if (surface->status) { surface = _cairo_surface_create_in_error (surface->status); - } else if (dev != NULL && dev->status) { - surface = _cairo_surface_create_in_error (dev->status); - } else if (dev == NULL || dev->surface.create_from_cacheable_image == NULL) { + } else if (device != NULL && device->base.status) { + surface = _cairo_surface_create_in_error (device->base.status); + } else if (device == NULL || device->surface.create_from_cacheable_image == NULL) { /* XXX invalid device! */ surface = _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); + } else if (device->base.finished) { + surface = _cairo_surface_create_in_error (CAIRO_STATUS_SURFACE_FINISHED); } else { - surface = dev->surface.create_from_cacheable_image (dev, surface); + surface = device->surface.create_from_cacheable_image (device, surface); } return surface; @@ -324,32 +210,22 @@ cairo_status_t cairo_drm_surface_enable_scan_out (cairo_surface_t *abstract_surface) { cairo_drm_surface_t *surface; + cairo_drm_device_t *device; surface = _cairo_surface_as_drm (abstract_surface); - if (surface == NULL) + if (unlikely (surface == NULL)) return _cairo_error (CAIRO_STATUS_SURFACE_TYPE_MISMATCH); + if (unlikely (surface->base.finished)) + return _cairo_error (CAIRO_STATUS_SURFACE_FINISHED); - if (surface->device->surface.enable_scan_out == NULL) + device = (cairo_drm_device_t *) surface->base.device; + if (device->surface.enable_scan_out == NULL) return CAIRO_STATUS_SUCCESS; - return surface->device->surface.enable_scan_out (abstract_surface); -} + if (unlikely (device->base.finished)) + return _cairo_error (CAIRO_STATUS_SURFACE_FINISHED); -cairo_drm_device_t * -cairo_drm_surface_get_device (cairo_surface_t *abstract_surface) -{ - cairo_drm_surface_t *surface; - - if (unlikely (abstract_surface->status)) - return _cairo_drm_device_create_in_error (abstract_surface->status); - - surface = _cairo_surface_as_drm (abstract_surface); - if (surface == NULL) { - _cairo_error_throw (CAIRO_STATUS_SURFACE_TYPE_MISMATCH); - return NULL; - } - - return surface->device; + return device->surface.enable_scan_out (abstract_surface); } unsigned int @@ -371,13 +247,15 @@ _cairo_drm_surface_flink (void *abstract_surface) { cairo_drm_surface_t *surface = abstract_surface; - return _cairo_drm_bo_flink (surface->device, surface->bo); + return _cairo_drm_bo_flink ((cairo_drm_device_t *) surface->base.device, + surface->bo); } unsigned int cairo_drm_surface_get_name (cairo_surface_t *abstract_surface) { cairo_drm_surface_t *surface; + cairo_drm_device_t *device; cairo_status_t status; surface = _cairo_surface_as_drm (abstract_surface); @@ -389,10 +267,11 @@ cairo_drm_surface_get_name (cairo_surface_t *abstract_surface) if (surface->bo->name) return surface->bo->name; - if (surface->device->surface.flink == NULL) + device = (cairo_drm_device_t *) surface->base.device; + if (device->surface.flink == NULL) return 0; - status = surface->device->surface.flink (abstract_surface); + status = device->surface.flink (abstract_surface); if (status) { if (_cairo_status_is_error (status)) status = _cairo_surface_set_error (abstract_surface, status); @@ -456,10 +335,8 @@ cairo_surface_t * cairo_drm_surface_map (cairo_surface_t *abstract_surface) { cairo_drm_surface_t *surface; - cairo_rectangle_int_t roi; - cairo_image_surface_t *image; + cairo_drm_device_t *device; cairo_status_t status; - void *image_extra; if (unlikely (abstract_surface->status)) return _cairo_surface_create_in_error (abstract_surface->status); @@ -474,23 +351,9 @@ cairo_drm_surface_map (cairo_surface_t *abstract_surface) return _cairo_surface_create_in_error (status); } - roi.x = roi.y = 0; - roi.width = surface->width; - roi.height = surface->height; - - status = _cairo_surface_acquire_dest_image (abstract_surface, - &roi, - &image, - &roi, - &image_extra); - if (unlikely (status)) - return _cairo_surface_create_in_error (status); - - assert (image_extra == NULL); - surface->map_count++; - - return &image->base; + device = (cairo_drm_device_t *) surface->base.device; + return cairo_surface_reference (device->surface.map_to_image (surface)); } void diff --git a/src/drm/cairo-drm.c b/src/drm/cairo-drm.c index a218fa52..9ccb10d5 100644 --- a/src/drm/cairo-drm.c +++ b/src/drm/cairo-drm.c @@ -33,6 +33,8 @@ #include "cairoint.h" #include "cairo-drm-private.h" + +#include "cairo-device-private.h" #include "cairo-error-private.h" #define LIBUDEV_I_KNOW_THE_API_IS_SUBJECT_TO_CHANGE @@ -43,29 +45,6 @@ static cairo_drm_device_t *_cairo_drm_known_devices; static cairo_drm_device_t *_cairo_drm_default_device; -static const cairo_drm_device_t _nil_device = { - CAIRO_REFERENCE_COUNT_INVALID, - CAIRO_STATUS_NO_MEMORY -}; - -static const cairo_drm_device_t _invalid_device = { - CAIRO_REFERENCE_COUNT_INVALID, - CAIRO_STATUS_INVALID_CONTENT -}; - -cairo_drm_device_t * -_cairo_drm_device_create_in_error (cairo_status_t status) -{ - switch ((int) status) { - default: - ASSERT_NOT_REACHED; - case CAIRO_STATUS_NO_MEMORY: - return (cairo_drm_device_t *) &_nil_device; - case CAIRO_STATUS_INVALID_CONTENT: - return (cairo_drm_device_t *) &_invalid_device; - } -} - static const char * get_udev_property(struct udev_device *device, const char *name) { @@ -81,16 +60,69 @@ get_udev_property(struct udev_device *device, const char *name) return NULL; } +static void +_device_flush (void *abstract_device) +{ + cairo_drm_device_t *device = abstract_device; + + device->device.flush (device); +} + +static void +_device_finish (void *abstract_device) +{ + cairo_drm_device_t *device = abstract_device; + + CAIRO_MUTEX_LOCK (_cairo_drm_device_mutex); + if (device->prev != NULL) + device->prev->next = device->next; + else + _cairo_drm_known_devices = device->next; + if (device->next != NULL) + device->next->prev = device->prev; + + CAIRO_MUTEX_UNLOCK (_cairo_drm_device_mutex); + + if (_cairo_atomic_ptr_cmpxchg (&_cairo_drm_default_device, + device, NULL) == device) + { + cairo_device_destroy (&device->base); + } +} + +static void +_device_destroy (void *abstract_device) +{ + cairo_drm_device_t *device = abstract_device; + + device->device.destroy (device); +} + +static const cairo_device_backend_t _cairo_drm_device_backend = { + CAIRO_DEVICE_TYPE_DRM, + + NULL, NULL, /* lock, unlock */ + + _device_flush, + _device_finish, + _device_destroy, +}; + cairo_drm_device_t * _cairo_drm_device_init (cairo_drm_device_t *dev, int fd, dev_t devid, + int vendor_id, + int chip_id, int max_surface_size) { - CAIRO_REFERENCE_COUNT_INIT (&dev->ref_count, 1); - dev->status = CAIRO_STATUS_SUCCESS; + assert (CAIRO_MUTEX_IS_LOCKED (_cairo_drm_device_mutex)); + + _cairo_device_init (&dev->base, &_cairo_drm_device_backend); dev->id = devid; + dev->vendor_id = vendor_id; + dev->chip_id = chip_id; dev->fd = fd; dev->max_surface_size = max_surface_size; @@ -102,12 +134,12 @@ _cairo_drm_device_init (cairo_drm_device_t *dev, _cairo_drm_known_devices = dev; if (_cairo_drm_default_device == NULL) - _cairo_drm_default_device = cairo_drm_device_reference (dev); + _cairo_drm_default_device = (cairo_drm_device_t *) cairo_device_reference (&dev->base); return dev; } -cairo_drm_device_t * +cairo_device_t * cairo_drm_device_get (struct udev_device *device) { static const struct dri_driver_entry { @@ -115,7 +147,34 @@ cairo_drm_device_get (struct udev_device *device) uint32_t chip_id; cairo_drm_device_create_func_t create_func; } driver_map[] = { + { 0x8086, 0x29a2, _cairo_drm_i965_device_create }, /* I965_G */ + { 0x8086, 0x2982, _cairo_drm_i965_device_create }, /* G35_G */ + { 0x8086, 0x2992, _cairo_drm_i965_device_create }, /* I965_Q */ + { 0x8086, 0x2972, _cairo_drm_i965_device_create }, /* I946_GZ */ + { 0x8086, 0x2a02, _cairo_drm_i965_device_create }, /* I965_GM */ + { 0x8086, 0x2a12, _cairo_drm_i965_device_create }, /* I965_GME */ + { 0x8086, 0x2e02, _cairo_drm_i965_device_create }, /* IGD_E_G */ + { 0x8086, 0x2e22, _cairo_drm_i965_device_create }, /* G45_G */ + { 0x8086, 0x2e12, _cairo_drm_i965_device_create }, /* Q45_G */ + { 0x8086, 0x2e32, _cairo_drm_i965_device_create }, /* G41_G */ + { 0x8086, 0x2a42, _cairo_drm_i965_device_create }, /* GM45_GM */ + + { 0x8086, 0x2582, _cairo_drm_i915_device_create }, /* I915_G */ + { 0x8086, 0x2592, _cairo_drm_i915_device_create }, /* I915_GM */ + { 0x8086, 0x258a, _cairo_drm_i915_device_create }, /* E7221_G */ + { 0x8086, 0x2772, _cairo_drm_i915_device_create }, /* I945_G */ + { 0x8086, 0x27a2, _cairo_drm_i915_device_create }, /* I945_GM */ + { 0x8086, 0x27ae, _cairo_drm_i915_device_create }, /* I945_GME */ + { 0x8086, 0x29c2, _cairo_drm_i915_device_create }, /* G33_G */ + { 0x8086, 0x29b2, _cairo_drm_i915_device_create }, /* Q35_G */ + { 0x8086, 0x29d2, _cairo_drm_i915_device_create }, /* Q33_G */ + { 0x8086, 0xa011, _cairo_drm_i915_device_create }, /* IGD_GM */ + { 0x8086, 0xa001, _cairo_drm_i915_device_create }, /* IGD_G */ + + /* XXX i830 */ + { 0x8086, ~0, _cairo_drm_intel_device_create }, + { 0x1002, ~0, _cairo_drm_radeon_device_create }, #if CAIRO_HAS_GALLIUM_SURFACE { ~0, ~0, _cairo_drm_gallium_device_create }, @@ -135,16 +194,16 @@ cairo_drm_device_get (struct udev_device *device) CAIRO_MUTEX_LOCK (_cairo_drm_device_mutex); for (dev = _cairo_drm_known_devices; dev != NULL; dev = dev->next) { if (dev->id == devid) { - dev = cairo_drm_device_reference (dev); + dev = (cairo_drm_device_t *) cairo_device_reference (&dev->base); goto DONE; } } - dev = (cairo_drm_device_t *) &_nil_device; parent = udev_device_get_parent (device); pci_id = get_udev_property (parent, "PCI_ID"); if (sscanf (pci_id, "%x:%x", &vendor_id, &chip_id) != 2) { - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); + dev = (cairo_drm_device_t *) + _cairo_device_create_in_error (CAIRO_STATUS_DEVICE_ERROR); goto DONE; } @@ -166,8 +225,8 @@ cairo_drm_device_get (struct udev_device *device) } if (i == ARRAY_LENGTH (driver_map)) { - /* XXX should be no driver or something*/ - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); + dev = (cairo_drm_device_t *) + _cairo_device_create_in_error (CAIRO_STATUS_DEVICE_ERROR); goto DONE; } } @@ -190,22 +249,21 @@ cairo_drm_device_get (struct udev_device *device) DONE: CAIRO_MUTEX_UNLOCK (_cairo_drm_device_mutex); - return dev; + return &dev->base; } slim_hidden_def (cairo_drm_device_get); -cairo_drm_device_t * +cairo_device_t * cairo_drm_device_get_for_fd (int fd) { struct stat st; struct udev *udev; struct udev_device *device; - cairo_drm_device_t *dev = NULL; + cairo_device_t *dev = NULL; if (fstat (fd, &st) < 0 || ! S_ISCHR (st.st_mode)) { //_cairo_error_throw (CAIRO_STATUS_INVALID_DEVICE); - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - return (cairo_drm_device_t *) &_nil_device; + return _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); } udev = udev_new (); @@ -220,25 +278,24 @@ cairo_drm_device_get_for_fd (int fd) return dev; } +slim_hidden_def (cairo_drm_device_get_for_fd); -cairo_drm_device_t * +cairo_device_t * cairo_drm_device_default (void) { struct udev *udev; struct udev_enumerate *e; struct udev_list_entry *entry; - cairo_drm_device_t *dev; + cairo_device_t *dev; /* optimistic atomic pointer read */ - dev = _cairo_drm_default_device; + dev = &_cairo_drm_default_device->base; if (dev != NULL) return dev; udev = udev_new(); - if (udev == NULL) { - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - return (cairo_drm_device_t *) &_nil_device; - } + if (udev == NULL) + return _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); e = udev_enumerate_new (udev); udev_enumerate_add_match_subsystem (e, "drm"); @@ -255,8 +312,9 @@ cairo_drm_device_default (void) udev_device_unref (device); if (dev != NULL) { - if (dev->fd == -1) { /* try again, we may find a usable card */ - cairo_drm_device_destroy (dev); + if (((cairo_drm_device_t *) dev)->fd == -1) { + /* try again, we may find a usable card */ + cairo_device_destroy (dev); dev = NULL; } else break; @@ -265,7 +323,7 @@ cairo_drm_device_default (void) udev_enumerate_unref (e); udev_unref (udev); - cairo_drm_device_destroy (dev); /* owned by _cairo_drm_default_device */ + cairo_device_destroy (dev); /* owned by _cairo_drm_default_device */ return dev; } slim_hidden_def (cairo_drm_device_default); @@ -274,90 +332,56 @@ void _cairo_drm_device_reset_static_data (void) { if (_cairo_drm_default_device != NULL) { - cairo_drm_device_destroy (_cairo_drm_default_device); + cairo_device_t *device = &_cairo_drm_default_device->base; _cairo_drm_default_device = NULL; + cairo_device_destroy (device); } } -cairo_drm_device_t * -cairo_drm_device_reference (cairo_drm_device_t *device) -{ - if (device == NULL || - CAIRO_REFERENCE_COUNT_IS_INVALID (&device->ref_count)) - { - return device; - } - - assert (CAIRO_REFERENCE_COUNT_HAS_REFERENCE (&device->ref_count)); - _cairo_reference_count_inc (&device->ref_count); - - return device; -} -slim_hidden_def (cairo_drm_device_reference); - int -cairo_drm_device_get_fd (cairo_drm_device_t *device) +cairo_drm_device_get_fd (cairo_device_t *abstract_device) { - if (device->status) + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; + + if (device->base.status) return -1; return device->fd; } -cairo_status_t -cairo_drm_device_status (cairo_drm_device_t *device) -{ - if (device == NULL) - return CAIRO_STATUS_NULL_POINTER; - - return device->status; -} - void _cairo_drm_device_fini (cairo_drm_device_t *device) { - CAIRO_MUTEX_LOCK (_cairo_drm_device_mutex); - if (device->prev != NULL) - device->prev->next = device->next; - else - _cairo_drm_known_devices = device->next; - if (device->next != NULL) - device->next->prev = device->prev; - CAIRO_MUTEX_UNLOCK (_cairo_drm_device_mutex); - if (device->fd != -1) close (device->fd); } void -cairo_drm_device_destroy (cairo_drm_device_t *device) +cairo_drm_device_throttle (cairo_device_t *abstract_device) { - if (device == NULL || - CAIRO_REFERENCE_COUNT_IS_INVALID (&device->ref_count)) - { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; + cairo_status_t status; + + if (unlikely (device->base.status)) return; - } - assert (CAIRO_REFERENCE_COUNT_HAS_REFERENCE (&device->ref_count)); - if (! _cairo_reference_count_dec_and_test (&device->ref_count)) + if (device->device.throttle == NULL) return; - device->device.destroy (device); + status = device->device.throttle (device); + if (unlikely (status)) + _cairo_status_set_error (&device->base.status, status); } -slim_hidden_def (cairo_drm_device_destroy); -void -cairo_drm_device_throttle (cairo_drm_device_t *dev) +cairo_bool_t +_cairo_drm_size_is_valid (cairo_device_t *abstract_device, + int width, int height) { - cairo_status_t status; + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; - if (unlikely (dev->status)) - return; + if (unlikely (device->base.status)) + return FALSE; - if (dev->device.throttle == NULL) - return; - - status = dev->device.throttle (dev); - if (unlikely (status)) - _cairo_status_set_error (&dev->status, status); + return width <= device->max_surface_size && + height <= device->max_surface_size; } |