diff options
Diffstat (limited to 'src')
34 files changed, 22674 insertions, 1115 deletions
diff --git a/src/Makefile.sources b/src/Makefile.sources index 8d688608..f6d4fe30 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -310,12 +310,29 @@ cairo_directfb_sources = cairo-directfb-surface.c cairo_drm_headers = cairo-drm.h cairo_drm_private = drm/cairo-drm-private.h \ drm/cairo-drm-intel-private.h \ + drm/cairo-drm-i915-private.h \ + drm/cairo-drm-i965-private.h \ + drm/cairo-drm-intel-brw-defines.h \ + drm/cairo-drm-intel-brw-structs.h \ + drm/cairo-drm-intel-brw-eu.h \ drm/cairo-drm-radeon-private.h cairo_drm_sources = drm/cairo-drm.c \ drm/cairo-drm-bo.c \ drm/cairo-drm-surface.c \ drm/cairo-drm-intel.c \ + drm/cairo-drm-intel-debug.c \ drm/cairo-drm-intel-surface.c \ + drm/cairo-drm-i915-surface.c \ + drm/cairo-drm-i915-glyphs.c \ + drm/cairo-drm-i915-shader.c \ + drm/cairo-drm-i915-spans.c \ + drm/cairo-drm-i965-surface.c \ + drm/cairo-drm-i965-glyphs.c \ + drm/cairo-drm-i965-shader.c \ + drm/cairo-drm-i965-spans.c \ + drm/cairo-drm-intel-brw-eu.c \ + drm/cairo-drm-intel-brw-eu-emit.c \ + drm/cairo-drm-intel-brw-eu-util.c \ drm/cairo-drm-radeon.c \ drm/cairo-drm-radeon-surface.c cairo_gallium_sources = drm/cairo-drm-gallium-surface.c diff --git a/src/cairo-drm.h b/src/cairo-drm.h index 1b50b1bd..52b9de21 100644 --- a/src/cairo-drm.h +++ b/src/cairo-drm.h @@ -39,56 +39,41 @@ CAIRO_BEGIN_DECLS -typedef struct _cairo_drm_device cairo_drm_device_t; - struct udev_device; -cairo_public cairo_drm_device_t * +cairo_public cairo_device_t * cairo_drm_device_get (struct udev_device *device); -cairo_public cairo_drm_device_t * +cairo_public cairo_device_t * cairo_drm_device_get_for_fd (int fd); -cairo_public cairo_drm_device_t * +cairo_public cairo_device_t * cairo_drm_device_default (void); -cairo_public cairo_drm_device_t * -cairo_drm_device_reference (cairo_drm_device_t *device); - -cairo_public cairo_status_t -cairo_drm_device_status (cairo_drm_device_t *device); - cairo_public int -cairo_drm_device_get_fd (cairo_drm_device_t *device); +cairo_drm_device_get_fd (cairo_device_t *device); cairo_public void -cairo_drm_device_throttle (cairo_drm_device_t *device); - -cairo_public void -cairo_drm_device_destroy (cairo_drm_device_t *device); - +cairo_drm_device_throttle (cairo_device_t *device); cairo_public cairo_surface_t * -cairo_drm_surface_create (cairo_drm_device_t *device, +cairo_drm_surface_create (cairo_device_t *device, cairo_content_t content, int width, int height); cairo_public cairo_surface_t * -cairo_drm_surface_create_for_name (cairo_drm_device_t *device, +cairo_drm_surface_create_for_name (cairo_device_t *device, unsigned int name, cairo_format_t format, int width, int height, int stride); cairo_public cairo_surface_t * -cairo_drm_surface_create_from_cacheable_image (cairo_drm_device_t *device, +cairo_drm_surface_create_from_cacheable_image (cairo_device_t *device, cairo_surface_t *surface); cairo_public cairo_status_t cairo_drm_surface_enable_scan_out (cairo_surface_t *surface); -cairo_public cairo_drm_device_t * -cairo_drm_surface_get_device (cairo_surface_t *abstract_surface); - cairo_public unsigned int cairo_drm_surface_get_handle (cairo_surface_t *surface); diff --git a/src/cairo-misc.c b/src/cairo-misc.c index cd1032a4..56409afc 100644 --- a/src/cairo-misc.c +++ b/src/cairo-misc.c @@ -674,6 +674,63 @@ _cairo_lround (double d) #undef LSW } +/* Convert a 32-bit IEEE single precision floating point number to a + * 'half' representation (s10.5) + */ +uint16_t +_cairo_half_from_float (float f) +{ + union { + uint32_t ui; + float f; + } u; + int s, e, m; + + u.f = f; + s = (u.ui >> 16) & 0x00008000; + e = ((u.ui >> 23) & 0x000000ff) - (127 - 15); + m = u.ui & 0x007fffff; + if (e <= 0) { + if (e < -10) { + /* underflow */ + return 0; + } + + m = (m | 0x00800000) >> (1 - e); + + /* round to nearest, round 0.5 up. */ + if (m & 0x00001000) + m += 0x00002000; + return s | (m >> 13); + } else if (e == 0xff - (127 - 15)) { + if (m == 0) { + /* infinity */ + return s | 0x7c00; + } else { + /* nan */ + m >>= 13; + return s | 0x7c00 | m | (m == 0); + } + } else { + /* round to nearest, round 0.5 up. */ + if (m & 0x00001000) { + m += 0x00002000; + + if (m & 0x00800000) { + m = 0; + e += 1; + } + } + + if (e > 30) { + /* overflow -> infinity */ + return s | 0x7c00; + } + + return s | (e << 10) | (m >> 13); + } +} + #ifdef _WIN32 diff --git a/src/cairoint.h b/src/cairoint.h index 6e059c4c..0892bbbd 100644 --- a/src/cairoint.h +++ b/src/cairoint.h @@ -949,6 +949,8 @@ _cairo_round (double r) cairo_private int _cairo_lround (double d) cairo_const; +cairo_private uint16_t +_cairo_half_from_float (float f) cairo_const; /* cairo-gstate.c */ cairo_private cairo_status_t diff --git a/src/drm/cairo-drm-bo.c b/src/drm/cairo-drm-bo.c index 980484a2..cb74bacc 100644 --- a/src/drm/cairo-drm-bo.c +++ b/src/drm/cairo-drm-bo.c @@ -31,11 +31,14 @@ #include "cairo-drm-private.h" #include "cairo-drm-ioctl-private.h" + #include "cairo-error-private.h" #include <sys/ioctl.h> #include <errno.h> +#define ERR_DEBUG(x) x + struct drm_gem_close { /** Handle of the object to be closed. */ uint32_t handle; @@ -79,8 +82,11 @@ _cairo_drm_bo_open_for_name (const cairo_drm_device_t *dev, do { ret = ioctl (dev->fd, DRM_IOCTL_GEM_OPEN, &open); } while (ret == -1 && errno == EINTR); - if (ret == -1) + if (ret == -1) { + ERR_DEBUG((fprintf (stderr, "Failed to open bo for name %d: %s\n", + name, strerror (errno)))); return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } bo->name = name; bo->size = open.size; @@ -99,8 +105,11 @@ _cairo_drm_bo_flink (const cairo_drm_device_t *dev, memset (&flink, 0, sizeof (flink)); flink.handle = bo->handle; ret = ioctl (dev->fd, DRM_IOCTL_GEM_FLINK, &flink); - if (ret == -1) + if (ret == -1) { + ERR_DEBUG((fprintf (stderr, "Failed to flink bo: %s\n", + strerror (errno)))); return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } bo->name = flink.name; diff --git a/src/drm/cairo-drm-gallium-surface.c b/src/drm/cairo-drm-gallium-surface.c index c2f331bc..73d75478 100644 --- a/src/drm/cairo-drm-gallium-surface.c +++ b/src/drm/cairo-drm-gallium-surface.c @@ -444,7 +444,6 @@ gallium_surface_create_internal (gallium_device_t *device, _cairo_surface_init (&surface->base.base, &gallium_surface_backend, - NULL, /* device */ content); _cairo_drm_surface_init (&surface->base, &device->base); @@ -551,7 +550,6 @@ gallium_surface_create_for_name (cairo_drm_device_t *base_dev, content = _cairo_content_from_format (format); _cairo_surface_init (&surface->base.base, &gallium_surface_backend, - NULL, /* device */ content); _cairo_drm_surface_init (&surface->base, base_dev); @@ -659,6 +657,7 @@ _cairo_drm_gallium_device_create (int fd, dev_t dev, int vendor_id, int chip_id) device->base.surface.enable_scan_out = NULL; device->base.surface.flink = gallium_surface_flink; + device->base.device.flush = NULL; device->base.device.throttle = NULL; device->base.device.destroy = gallium_device_destroy; diff --git a/src/drm/cairo-drm-i915-glyphs.c b/src/drm/cairo-drm-i915-glyphs.c new file mode 100644 index 00000000..babd59e6 --- /dev/null +++ b/src/drm/cairo-drm-i915-glyphs.c @@ -0,0 +1,534 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Red Hat, Inc. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-composite-rectangles-private.h" +#include "cairo-drm-i915-private.h" +#include "cairo-error-private.h" +#include "cairo-rtree-private.h" + +static void +i915_emit_glyph_rectangle_constant (i915_device_t *device, + i915_shader_t *shader, + int x1, int y1, + int x2, int y2, + intel_glyph_t *glyph) +{ + float *v; + + /* Each vertex is: + * 2 vertex coordinates + * 2 glyph texture coordinates + */ + + v = i915_add_rectangle (device); + + /* bottom right */ + *v++ = x2; *v++ = y2; + *v++ = glyph->texcoord[0]; + + /* bottom left */ + *v++ = x1; *v++ = y2; + *v++ = glyph->texcoord[1]; + + /* top left */ + *v++ = x1; *v++ = y1; + *v++ = glyph->texcoord[2]; +} + +static void +i915_emit_glyph_rectangle_general (i915_device_t *device, + i915_shader_t *shader, + int x1, int y1, + int x2, int y2, + intel_glyph_t *glyph) +{ + double s, t; + float *v; + + /* Each vertex is: + * 2 vertex coordinates + * [0-2] source texture coordinates + * 2 glyph texture coordinates + */ + + v = i915_add_rectangle (device); + + /* bottom right */ + *v++ = x2; *v++ = y2; + s = x2, t = y2; + switch (shader->source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->source.linear, s, t); + break; + case VS_RADIAL: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + *v++ = glyph->texcoord[0]; + + /* bottom left */ + *v++ = x1; *v++ = y2; + s = x1, t = y2; + switch (shader->source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->source.linear, s, t); + break; + case VS_RADIAL: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + *v++ = glyph->texcoord[1]; + + /* top left */ + *v++ = x1; *v++ = y1; + s = x1, t = y2; + switch (shader->source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->source.linear, s, t); + break; + case VS_RADIAL: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + *v++ = glyph->texcoord[2]; +} + +typedef void +(*i915_emit_glyph_rectangle_func_t) (i915_device_t *device, + i915_shader_t *shader, + int x1, int y1, + int x2, int y2, + intel_glyph_t *glyph); + +static cairo_status_t +i915_surface_mask_internal (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *source, + i915_surface_t *mask, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + i915_device_t *device; + i915_shader_t shader; + cairo_region_t *clip_region = NULL; + cairo_status_t status; + + i915_shader_init (&shader, dst, op); + + status = i915_shader_acquire_pattern (&shader, &shader.source, + source, &extents->bounded); + if (unlikely (status)) + return status; + + shader.mask.type.vertex = VS_TEXTURE_16; + shader.mask.type.fragment = FS_TEXTURE; + shader.mask.base.content = mask->intel.drm.base.content; + shader.mask.base.texfmt = TEXCOORDFMT_2D_16; + shader.mask.base.n_samplers = 1; + shader.mask.base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_NEAREST); + shader.mask.base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_translate (&shader.mask.base.matrix, + -extents->bounded.x + NEAREST_BIAS, + -extents->bounded.y + NEAREST_BIAS); + cairo_matrix_scale (&shader.mask.base.matrix, + 1. / mask->intel.drm.width, + 1. / mask->intel.drm.height); + + shader.mask.base.bo = to_intel_bo (mask->intel.drm.bo); + shader.mask.base.offset[0] = 0; + shader.mask.base.map[0] = mask->map0; + shader.mask.base.map[1] = mask->map1; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i915_shader_set_clip (&shader, clip); + } + + status = cairo_device_acquire (dst->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SHADER; + + device = i915_device (dst); + + status = i915_shader_commit (&shader, device); + if (unlikely (status)) + goto CLEANUP_DEVICE; + + if (clip_region != NULL) { + unsigned int n, num_rectangles; + + num_rectangles = cairo_region_num_rectangles (clip_region); + for (n = 0; n < num_rectangles; n++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, n, &rect); + + shader.add_rectangle (&shader, + rect.x, rect.y, + rect.x + rect.width, rect.y + rect.height); + } + } else { + shader.add_rectangle (&shader, + extents->bounded.x, extents->bounded.y, + extents->bounded.x + extents->bounded.width, + extents->bounded.y + extents->bounded.height); + } + + if ((extents->is_bounded & CAIRO_OPERATOR_BOUND_BY_MASK) == 0) + status = i915_fixup_unbounded (dst, extents, clip); + +CLEANUP_DEVICE: + cairo_device_release (&device->intel.base.base); +CLEANUP_SHADER: + i915_shader_fini (&shader); + return status; +} + +cairo_int_status_t +i915_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining) +{ + i915_surface_t *surface = abstract_surface; + i915_surface_t *mask = NULL; + i915_device_t *device; + i915_shader_t shader; + cairo_composite_rectangles_t extents; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_bool_t overlap; + cairo_region_t *clip_region = NULL; + intel_bo_t *last_bo = NULL; + i915_emit_glyph_rectangle_func_t emit_func; + cairo_scaled_glyph_t *glyph_cache[64]; + cairo_status_t status; + int mask_x = 0, mask_y = 0; + int i = 0; + + *num_remaining = 0; + status = _cairo_composite_rectangles_init_for_glyphs (&extents, + surface->intel.drm.width, + surface->intel.drm.height, + op, source, + scaled_font, + glyphs, num_glyphs, + clip, + &overlap); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL && extents.is_bounded) { + clip = _cairo_clip_init_copy (&local_clip, clip); + status = _cairo_clip_rectangle (clip, &extents.bounded); + if (unlikely (status)) + return status; + + have_clip = TRUE; + } + + if (overlap || (extents.is_bounded & CAIRO_OPERATOR_BOUND_BY_MASK) == 0) { + cairo_content_t content; + + content = CAIRO_CONTENT_ALPHA; + if (scaled_font->options.antialias == CAIRO_ANTIALIAS_SUBPIXEL) + content |= CAIRO_CONTENT_COLOR; + + mask = (i915_surface_t *) + i915_surface_create_internal (&i915_device (surface)->intel.base, + CAIRO_CONTENT_ALPHA, + extents.bounded.width, + extents.bounded.height, + I915_TILING_DEFAULT, + TRUE); + if (unlikely (mask->intel.drm.base.status)) + return mask->intel.drm.base.status; + + status = _cairo_surface_paint (&mask->intel.drm.base, + CAIRO_OPERATOR_CLEAR, + &_cairo_pattern_clear.base, + NULL); + if (unlikely (status)) { + cairo_surface_destroy (&mask->intel.drm.base); + return status; + } + + i915_shader_init (&shader, mask, CAIRO_OPERATOR_ADD); + + status = i915_shader_acquire_pattern (&shader, &shader.source, + &_cairo_pattern_white.base, + &extents.bounded); + if (unlikely (status)) { + cairo_surface_destroy (&mask->intel.drm.base); + return status; + } + + mask_x = -extents.bounded.x; + mask_y = -extents.bounded.y; + } else { + i915_shader_init (&shader, surface, op); + + status = i915_shader_acquire_pattern (&shader, &shader.source, + source, &extents.bounded); + if (unlikely (status)) + return status; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i915_shader_set_clip (&shader, clip); + } + } + + shader.mask.type.fragment = FS_TEXTURE; + shader.mask.base.content = CAIRO_CONTENT_ALPHA; /* XXX */ + shader.mask.base.texfmt = TEXCOORDFMT_2D_16; + shader.mask.base.n_samplers = 1; + shader.mask.base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_NEAREST); + shader.mask.base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (CAIRO_EXTEND_NONE); + + switch (shader.source.type.vertex) { + case VS_CONSTANT: + emit_func = i915_emit_glyph_rectangle_constant; + break; + default: + case VS_LINEAR: + case VS_RADIAL: + case VS_TEXTURE: + case VS_TEXTURE_16: + emit_func = i915_emit_glyph_rectangle_general; + break; + } + + status = cairo_device_acquire (surface->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SHADER; + + device = i915_device (surface); + + _cairo_scaled_font_freeze_cache (scaled_font); + if (scaled_font->surface_private == NULL) { + /* XXX couple into list to remove on context destruction */ + scaled_font->surface_private = device; + scaled_font->surface_backend = surface->intel.drm.base.backend; + } + + memset (glyph_cache, 0, sizeof (glyph_cache)); + + for (i = 0; i < num_glyphs; i++) { + cairo_scaled_glyph_t *scaled_glyph; + int x, y, x1, x2, y1, y2; + int cache_index = glyphs[i].index % ARRAY_LENGTH (glyph_cache); + intel_glyph_t *glyph; + + scaled_glyph = glyph_cache[cache_index]; + if (scaled_glyph == NULL || + _cairo_scaled_glyph_index (scaled_glyph) != glyphs[i].index) + { + status = _cairo_scaled_glyph_lookup (scaled_font, + glyphs[i].index, + CAIRO_SCALED_GLYPH_INFO_METRICS, + &scaled_glyph); + if (unlikely (status)) + goto FINISH; + + glyph_cache[cache_index] = scaled_glyph; + } + + if (unlikely (scaled_glyph->metrics.width == 0 || + scaled_glyph->metrics.height == 0)) + { + continue; + } + + /* XXX glyph images are snapped to pixel locations */ + x = _cairo_lround (glyphs[i].x); + y = _cairo_lround (glyphs[i].y); + + x1 = x + _cairo_fixed_integer_floor (scaled_glyph->bbox.p1.x); + y1 = y + _cairo_fixed_integer_floor (scaled_glyph->bbox.p1.y); + x2 = x + _cairo_fixed_integer_ceil (scaled_glyph->bbox.p2.x); + y2 = y + _cairo_fixed_integer_ceil (scaled_glyph->bbox.p2.y); + + if (x2 < extents.bounded.x || + y2 < extents.bounded.y || + x1 > extents.bounded.x + extents.bounded.width || + y1 > extents.bounded.y + extents.bounded.height) + { + continue; + } + + if (scaled_glyph->surface_private == NULL) { + status = intel_get_glyph (&device->intel, scaled_font, scaled_glyph); + if (unlikely (status == CAIRO_INT_STATUS_NOTHING_TO_DO)) { + status = CAIRO_STATUS_SUCCESS; + continue; + } + if (unlikely (status)) + goto FINISH; + } + glyph = intel_glyph_pin (scaled_glyph->surface_private); + + if (glyph->cache->buffer.bo != last_bo) { + intel_buffer_cache_t *cache = glyph->cache; + + shader.mask.base.bo = cache->buffer.bo; + shader.mask.base.offset[0] = cache->buffer.offset; + shader.mask.base.map[0] = cache->buffer.map0; + shader.mask.base.map[1] = cache->buffer.map1; + shader.mask.base.content = CAIRO_CONTENT_ALPHA; /* XXX */ + + status = i915_shader_commit (&shader, device); + if (unlikely (status)) + goto FINISH; + + last_bo = cache->buffer.bo; + } + + x1 += mask_x; x2 += mask_x; + y1 += mask_y; y2 += mask_y; + + /* XXX clip glyph */ + emit_func (device, &shader, x1, y1, x2, y2, glyph); + } + + status = CAIRO_STATUS_SUCCESS; + FINISH: + _cairo_scaled_font_thaw_cache (scaled_font); + cairo_device_release (surface->intel.drm.base.device); + CLEANUP_SHADER: + i915_shader_fini (&shader); + + if (unlikely (status == CAIRO_INT_STATUS_UNSUPPORTED)) { + cairo_path_fixed_t path; + + _cairo_path_fixed_init (&path); + status = _cairo_scaled_font_glyph_path (scaled_font, + glyphs + i, num_glyphs - i, + &path); + if (mask_x | mask_y) { + _cairo_path_fixed_translate (&path, + _cairo_fixed_from_int (mask_x), + _cairo_fixed_from_int (mask_y)); + } + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = surface->intel.drm.base.backend->fill (shader.target, + shader.op, + mask != NULL ? &_cairo_pattern_white.base : source, + &path, + CAIRO_FILL_RULE_WINDING, + 0, + scaled_font->options.antialias, + clip); + } + _cairo_path_fixed_fini (&path); + } + + if (mask != NULL) { + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = i915_surface_mask_internal (surface, op, source, mask, + clip, &extents); + } + cairo_surface_finish (&mask->intel.drm.base); + cairo_surface_destroy (&mask->intel.drm.base); + } + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} diff --git a/src/drm/cairo-drm-i915-private.h b/src/drm/cairo-drm-i915-private.h new file mode 100644 index 00000000..060c21c8 --- /dev/null +++ b/src/drm/cairo-drm-i915-private.h @@ -0,0 +1,1169 @@ +/* + * Copyright © 2006, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#ifndef CAIRO_DRM_I915_PRIVATE_H +#define CAIRO_DRM_I915_PRIVATE_H + +#include "cairo-types-private.h" + +#include "cairo-drm-private.h" +#include "cairo-drm-intel-private.h" +#include "cairo-drm-intel-command-private.h" +#include "cairo-drm-intel-ioctl-private.h" +#include "cairo-freelist-private.h" + +#define I915_VERBOSE 1 + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_TYPE_SHIFT 4 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + +/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic + * operations + */ +#define MASK_X 0x1 +#define MASK_Y 0x2 +#define MASK_Z 0x4 +#define MASK_W 0x8 +#define MASK_XYZ (MASK_X | MASK_Y | MASK_Z) +#define MASK_XYZW (MASK_XYZ | MASK_W) +#define MASK_SATURATE 0x10 + +/* Temporary, undeclared regs. Preserved between phases */ +#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0) +#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1) +#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2) +#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3) + +/* Texture coordinate regs. Must be declared. */ +#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0) +#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1) +#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2) +#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3) +#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4) +#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5) +#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6) +#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7) +#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8) +#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9) +#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10) + +/* Constant values */ +#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0) +#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1) +#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2) +#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3) +#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4) +#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5) +#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6) +#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7) + +/* Sampler regs */ +#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0) +#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1) +#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2) +#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3) + +/* Output color */ +#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0) + +/* Output depth */ +#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0) + +/* Unpreserved temporary regs */ +#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0) +#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1) +#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2) +#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3) + +#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3) +#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 3) +#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 3) +#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 3) + +#define REG_CHANNEL_MASK 0x7 + +#define REG_NR(reg) ((reg) & REG_NR_MASK) +#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK) +#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK) + +enum i915_fs_channel { + X_CHANNEL_VAL = 1, + Y_CHANNEL_VAL, + Z_CHANNEL_VAL, + W_CHANNEL_VAL, + ZERO_CHANNEL_VAL, + ONE_CHANNEL_VAL +}; + +#define i915_fs_operand(reg, x, y, z, w) \ + (reg) | \ + (x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \ + (y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \ + (z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \ + (w##_CHANNEL_VAL << W_CHANNEL_SHIFT) + +/** + * Construct an operand description for using a register with no swizzling + */ +#define i915_fs_operand_reg(reg) \ + i915_fs_operand(reg, X, Y, Z, W) + +#define i915_fs_operand_reg_negate(reg) \ + i915_fs_operand(reg, -X, -Y, -Z, -W) + +/** + * Returns an operand containing (0.0, 0.0, 0.0, 0.0). + */ +#define i915_fs_operand_zero() i915_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO) + +/** + * Returns an unused operand + */ +#define i915_fs_operand_none() i915_fs_operand_zero() + +/** + * Returns an operand containing (1.0, 1.0, 1.0, 1.0). + */ +#define i915_fs_operand_one() i915_fs_operand(FS_R0, ONE, ONE, ONE, ONE) + +#define i915_get_hardware_channel_val(val, shift, negate) \ + (((int) val < 0) ? (~val << shift) | negate : (val - 1) << shift) + +/** + * Outputs a fragment shader command to declare a sampler or texture register. + */ +#define i915_fs_dcl(reg) \ +do { \ + OUT_DWORD (D0_DCL | \ + (REG_TYPE(reg) << D0_TYPE_SHIFT) | \ + (REG_NR(reg) << D0_NR_SHIFT) | \ + ((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \ + OUT_DWORD (0); \ + OUT_DWORD (0); \ +} while (0) + +#define i915_fs_texld(dest_reg, sampler_reg, address_reg) \ +do { \ + OUT_DWORD (T0_TEXLD | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_DWORD((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_DWORD (0); \ +} while (0) + +#define i915_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \ + _i915_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2) + +#define i915_fs_arith(op, dest_reg, operand0, operand1, operand2) \ + _i915_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2) + +#define _i915_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \ +do { \ + /* Set up destination register and write mask */ \ + OUT_DWORD (cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \ + (((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_DWORD (i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_DWORD (i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ +} while (0) + +#define _i915_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\ + /* Set up destination register and write mask */ \ + OUT_DWORD (cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (A0_DEST_CHANNEL_ALL) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_DWORD (i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_DWORD (i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ +} while (0) + +#define i915_fs_mov(dest_reg, operand0) \ + i915_fs_arith(MOV, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + +#define i915_fs_mov_masked(dest_reg, dest_mask, operand0) \ + i915_fs_arith_masked (MOV, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + + +#define i915_fs_frc(dest_reg, operand0) \ + i915_fs_arith (FRC, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + +/** Add operand0 and operand1 and put the result in dest_reg */ +#define i915_fs_add(dest_reg, operand0, operand1) \ + i915_fs_arith (ADD, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Multiply operand0 and operand1 and put the result in dest_reg */ +#define i915_fs_mul(dest_reg, operand0, operand1) \ + i915_fs_arith (MUL, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */ +#define i915_fs_rsq(dest_reg, dest_mask, operand0) \ +do { \ + if (dest_mask) { \ + i915_fs_arith_masked (RSQ, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } else { \ + i915_fs_arith (RSQ, dest_reg, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } \ +} while (0) + +/** Puts the minimum of operand0 and operand1 in dest_reg */ +#define i915_fs_min(dest_reg, operand0, operand1) \ + i915_fs_arith (MIN, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Puts the maximum of operand0 and operand1 in dest_reg */ +#define i915_fs_max(dest_reg, operand0, operand1) \ + i915_fs_arith (MAX, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +#define i915_fs_cmp(dest_reg, operand0, operand1, operand2) \ + i915_fs_arith (CMP, dest_reg, operand0, operand1, operand2) + +/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */ +#define i915_fs_mad(dest_reg, dest_mask, op0, op1, op2) \ +do { \ + if (dest_mask) { \ + i915_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (MAD, dest_reg, op0, op1, op2); \ + } \ +} while (0) + +#define i915_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \ +do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \ + } \ +} while (0) + +/** + * Perform a 3-component dot-product of operand0 and operand1 and put the + * resulting scalar in the channels of dest_reg specified by the dest_mask. + */ +#define i915_fs_dp3(dest_reg, dest_mask, op0, op1) \ +do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP3, dest_reg, dest_mask, \ + op0, op1,\ + i915_fs_operand_none()); \ + } else { \ + i915_fs_arith (DP3, dest_reg, op0, op1,\ + i915_fs_operand_none()); \ + } \ +} while (0) + +static inline uint32_t cairo_const +i915_fs_operand_pure_alpha (int pure) +{ + if (pure & (1 << 3)) + return i915_fs_operand_one (); + else + return i915_fs_operand_zero (); +} + +#define I915_TILING_DEFAULT I915_TILING_Y +#define I915_BO_CACHE_BUCKETS 13 /* cache surfaces up to 16 MiB */ + +typedef struct i915_surface i915_surface_t; +typedef struct i915_device i915_device_t; +typedef struct i915_shader i915_shader_t; + +typedef void (*i915_add_rectangle_func_t) (const i915_shader_t *shader, + int x, int y, + int w, int h); + +#define IMAGE_CACHE_WIDTH 1024 +#define IMAGE_CACHE_HEIGHT 1024 + +typedef struct i915_image_private { + cairo_rtree_node_t node; + intel_buffer_cache_t *container; +} i915_image_private_t; + +#define I915_BATCH_SIZE (64*1024) +#define I915_VBO_SIZE (512*1024) +#define I915_MAX_RELOCS 2048 + +enum { + I915_DEBUG_EXEC = 0x1, + I915_DEBUG_SYNC = 0x2, + I915_DEBUG_BATCH = 0x4, + I915_DEBUG_BUFFER = 0x8, + I915_DEBUG_BUFFER_CACHE = 0x10, + I915_DEBUG_BUFFER_ALLOC = 0x20, + I915_DEBUG_GLYPHS = 0x40, + I915_DEBUG_MAP = 0x80, + I915_DEBUG_THROTTLE = 0x100, +}; + +struct i915_device { + intel_device_t intel; + + cairo_bool_t debug; + + struct i915_batch { + intel_bo_t *target_bo[I915_MAX_RELOCS]; + size_t gtt_size; + + struct drm_i915_gem_exec_object2 exec[I915_MAX_RELOCS]; + int exec_count; + + struct drm_i915_gem_relocation_entry reloc[I915_MAX_RELOCS]; + uint16_t reloc_count; + + uint16_t used; + } batch; + + uint32_t vbo; + uint32_t vbo_offset; + uint32_t vbo_used; + uint32_t vbo_max_index; + uint32_t vertex_index; + uint32_t vertex_count; + uint32_t floats_per_vertex; + uint32_t rectangle_size; + intel_bo_t *last_vbo; + uint32_t last_vbo_offset; + uint32_t last_vbo_space; + + i915_shader_t *current_shader; + + i915_surface_t *current_target; + uint32_t current_size; + uint32_t current_diffuse; + uint32_t current_colorbuf; + uint32_t *current_source; + uint32_t *current_mask; + uint32_t *current_clip; + uint32_t current_program; + uint32_t current_texcoords; + uint32_t current_blend; + uint32_t current_constants[8*4]; + uint32_t current_n_constants; + uint32_t current_samplers[2*(3+3*4)]; + uint32_t current_n_samplers; + uint32_t last_source_fragment; + + cairo_list_t image_caches[2]; + + uint32_t batch_header[18]; + uint32_t batch_base[I915_BATCH_SIZE / sizeof (uint32_t)]; + uint8_t vbo_base[I915_VBO_SIZE]; +}; + +enum { + CURRENT_SOURCE = 0x1, + CURRENT_MASK = 0x2, + CURRENT_CLIP = 0x4 +}; + +typedef enum { + VS_CONSTANT, + VS_LINEAR, + VS_RADIAL, + VS_TEXTURE, + VS_TEXTURE_16, +} i915_vertex_shader_t; + +typedef enum { + FS_ZERO, + FS_ONE, + FS_PURE, + FS_CONSTANT, + FS_DIFFUSE, + FS_LINEAR, + FS_RADIAL, + FS_TEXTURE, + FS_YUV, + FS_SPANS, +} i915_fragment_shader_t; + +#define FS_DETAILS_SHIFT 4 + +typedef enum { + PATTERN_BASE, + PATTERN_CONSTANT, + PATTERN_LINEAR, + PATTERN_RADIAL, + PATTERN_TEXTURE, +} i915_shader_channel_t; + +struct i915_surface { + intel_surface_t intel; + + uint32_t map0, map1; + uint32_t colorbuf; + + uint32_t offset; + uint32_t is_current_texture; + + i915_image_private_t *cache; + + intel_bo_t *stencil; + uint32_t stencil_stride; + uint32_t stencil_offset; +}; + +typedef enum { + NONE = 0, + YUV_I420, + /* XXX */ + YUV_YV12, + YUV_YUY2, + YUV_UYVY, +} i915_packed_pixel_t; + +/* read-only container */ +#define I915_PACKED_PIXEL_SURFACE_TYPE 0x1000 +typedef struct i915_packed_pixel_surface { + cairo_surface_t base; + + i915_packed_pixel_t pixel; + + i915_device_t *device; + intel_bo_t *bo; + uint32_t is_current_texture; + + uint32_t offset[4]; + uint32_t stride[4]; + uint32_t width[4]; + uint32_t height[4]; + uint32_t map0[4], map1[4]; +} i915_packed_pixel_surface_t; + +struct i915_shader { + i915_device_t *device; + i915_surface_t *target; + + cairo_operator_t op; + uint32_t blend; + cairo_content_t content; + + cairo_bool_t need_combine; + + i915_add_rectangle_func_t add_rectangle; + + union i915_shader_channel { + struct { + i915_vertex_shader_t vertex; + i915_fragment_shader_t fragment; + i915_shader_channel_t pattern; + } type; + struct i915_shader_base { + i915_vertex_shader_t vertex; + i915_fragment_shader_t fragment; + i915_shader_channel_t pattern; + uint32_t texfmt; + cairo_content_t content; + uint32_t mode; + intel_bo_t *bo; + uint32_t n_samplers; + uint32_t offset[4]; + uint32_t map[2*4]; + uint32_t sampler[2]; + cairo_matrix_t matrix; + } base; + struct i915_shader_solid { + struct i915_shader_base base; + cairo_color_t color; + int pure; + } solid; + struct i915_shader_linear { + struct i915_shader_base base; + struct { + float red, green, blue, alpha; + } color0, color1; + float dx, dy, offset; + } linear; + struct i915_shader_radial { + struct i915_shader_base base; + float constants[8]; + } radial; + struct i915_shader_surface { + struct i915_shader_base base; + i915_packed_pixel_t pixel; + } surface; + } source, mask, clip, dst; +}; + +enum i915_shader_linear_mode { + /* XXX REFLECT */ + LINEAR_TEXTURE, + LINEAR_NONE, + LINEAR_REPEAT, + LINEAR_PAD, +}; + +enum i915_shader_radial_mode { + RADIAL_ONE, + RADIAL_TWO +}; + +typedef cairo_status_t +(*i915_spans_func_t) (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents); + +cairo_private cairo_status_t +i915_clip_and_composite_spans (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + i915_spans_func_t draw_func, + void *draw_closure, + const cairo_composite_rectangles_t*extents, + cairo_clip_t *clip); + +cairo_private cairo_surface_t * +i915_surface_create_internal (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height, + uint32_t tiling, + cairo_bool_t gpu_target); + +cairo_private i915_surface_t * +i915_surface_create_from_cacheable_image_internal (i915_device_t *device, + cairo_image_surface_t *image); + +cairo_private void +i915_surface_scaled_font_fini (cairo_scaled_font_t *scaled_font); + +cairo_private cairo_int_status_t +i915_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining); + +static inline int cairo_const +i915_tiling_height (uint32_t tiling, int height) +{ + switch (tiling) { + default: + case I915_TILING_NONE: return (height + 1) & -2; + case I915_TILING_X: return (height + 7) & -8; + case I915_TILING_Y: return (height + 31) & -32; + } +} + +static inline uint32_t cairo_const +i915_tiling_stride (int format, uint32_t stride) +{ + uint32_t tile_width; + + if (format == I915_TILING_NONE) + return (stride + 31) & -32; + + tile_width = 512; + /* XXX Currently the kernel enforces a tile_width of 512 for TILING_Y. + + <jbarnes> the docs are a bit confused on that front + <jbarnes> once we enable it on 915 we'll find out what the tile width size should be in the fence setup + <jbarnes> it could be that 915 has y tiling but that the minimum width is 512 or something + <jbarnes> yeah it's probably 128 on 915 also + <jbarnes> it's just that we haven't tested + <jbarnes> but I wasn't thinking that the tile widths were the same + <jbarnes> only that in order to fence y tiles on 915 you needed pitch to be a multiple of 4 y tiles (or something like that) + + tile_width = format == I915_TILING_Y ? 128 : 512; + */ + + /* needs a pot tile width */ + while (tile_width < stride) + tile_width <<= 1; + + return tile_width; +} + +static inline uint32_t cairo_const +i915_tiling_size (uint32_t tiling, uint32_t size) +{ + uint32_t fence; + + if (tiling == I915_TILING_NONE) + return (size + 4095) & -4096; + + fence = 1024 * 1024; /* 1 MiB */ + while (fence < size) + fence <<= 1; + + return fence; +} + +static inline cairo_bool_t cairo_pure +i915_texture_filter_is_nearest (cairo_filter_t filter) +{ + switch (filter) { + case CAIRO_FILTER_BEST: + case CAIRO_FILTER_GOOD: + case CAIRO_FILTER_BILINEAR: + case CAIRO_FILTER_GAUSSIAN: + return FALSE; + default: + case CAIRO_FILTER_FAST: + case CAIRO_FILTER_NEAREST: + return TRUE; + } +} + +static inline uint32_t cairo_pure +i915_texture_filter (cairo_filter_t filter) +{ + switch (filter) { + case CAIRO_FILTER_BEST: + case CAIRO_FILTER_GOOD: + case CAIRO_FILTER_BILINEAR: + case CAIRO_FILTER_GAUSSIAN: + return + (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT); + default: + case CAIRO_FILTER_FAST: + case CAIRO_FILTER_NEAREST: + return + (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT) | + (FILTER_NEAREST << SS2_MIN_FILTER_SHIFT); + } +} + +static inline uint32_t cairo_pure +i915_texture_extend (cairo_extend_t extend) +{ + switch (extend) { + default: + case CAIRO_EXTEND_NONE: + return + (TEXCOORDMODE_CLAMP_BORDER << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_BORDER << SS3_TCY_ADDR_MODE_SHIFT); + case CAIRO_EXTEND_REPEAT: + return + (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT); + case CAIRO_EXTEND_PAD: + return + (TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT); + case CAIRO_EXTEND_REFLECT: + return + (TEXCOORDMODE_MIRROR << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_MIRROR << SS3_TCY_ADDR_MODE_SHIFT); + } +} + +static inline uint32_t cairo_pure +BUF_tiling (uint32_t tiling) +{ + switch (tiling) { + default: + case I915_TILING_NONE: return 0; + case I915_TILING_X: return BUF_3D_TILED_SURFACE | BUF_3D_TILE_WALK_X; + case I915_TILING_Y: return BUF_3D_TILED_SURFACE | BUF_3D_TILE_WALK_Y; + } +} + +#define OUT_DWORD(dword) i915_batch_emit_dword (device, dword) +#define OUT_RELOC(surface, read, write) i915_batch_emit_reloc (device, to_intel_bo (surface->intel.drm.bo), surface->offset, read, write) + +#define FS_LOCALS \ + uint32_t *_shader_start + +#define FS_BEGIN() \ +do { \ + _shader_start = BATCH_PTR (device); \ + OUT_DWORD (_3DSTATE_PIXEL_SHADER_PROGRAM); \ +} while (0) + +#define FS_END() \ +do { \ + *_shader_start |= BATCH_PTR (device) - _shader_start - 2; \ +} while (0); + +static inline int32_t +i915_batch_space (i915_device_t *device) +{ + /* leave room for RECTLIST(4) + MI_BUFFER_END + MI_NOOP */ + return sizeof (device->batch_base) - (device->batch.used << 2) - 32; +} + +static inline cairo_bool_t +i915_check_aperture_size (const i915_device_t *device, int relocs, size_t size) +{ + return device->batch.reloc_count + relocs < I915_MAX_RELOCS && + device->batch.gtt_size + size <= device->intel.gtt_avail_size; +} + +static inline cairo_bool_t +i915_check_aperture (const i915_device_t *device, intel_bo_t **bo_array, int count) +{ + uint32_t relocs = 0, size = 0; + + while (count--) { + const intel_bo_t *bo = *bo_array++; + if (bo->exec == NULL) { + relocs++; + size += bo->base.size; + } + } + + return i915_check_aperture_size (device, relocs, size); +} + +#define BATCH_PTR(device) &(device)->batch_base[(device)->batch.used] +static inline void +i915_batch_emit_dword (i915_device_t *device, uint32_t dword) +{ + device->batch_base[device->batch.used++] = dword; +} + +cairo_private void +i915_batch_add_reloc (i915_device_t *device, uint32_t pos, + intel_bo_t *bo, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain); + +static inline void +i915_batch_fill_reloc (i915_device_t *device, uint32_t pos, + intel_bo_t *bo, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain) +{ + i915_batch_add_reloc (device, pos, + bo, offset, + read_domains, write_domain); + device->batch_base[pos] = bo->offset + offset; +} + +static inline void +i915_batch_emit_reloc (i915_device_t *device, + intel_bo_t *bo, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain) +{ + i915_batch_add_reloc (device, device->batch.used, + bo, offset, + read_domains, write_domain); + i915_batch_emit_dword (device, bo->offset + offset); +} + +cairo_private cairo_status_t +i915_vbo_flush (i915_device_t *device); + +cairo_private void +i915_vbo_finish (i915_device_t *device); + +cairo_private cairo_status_t +i915_batch_flush (i915_device_t *device); + +static inline float * +i915_add_rectangle (i915_device_t *device) +{ + float *vertices; + uint32_t size; + + assert (device->floats_per_vertex); + + size = device->rectangle_size; + if (unlikely (device->vbo_offset + size > I915_VBO_SIZE)) + i915_vbo_finish (device); + + vertices = (float *) (device->vbo_base + device->vbo_offset); + device->vbo_used = device->vbo_offset += size; + device->vertex_count += 3; + return vertices; +} + +static inline i915_device_t * +i915_device (i915_surface_t *surface) +{ + return (i915_device_t *) surface->intel.drm.base.device; +} + +cairo_private void +i915_shader_init (i915_shader_t *shader, + i915_surface_t *dst, + cairo_operator_t op); + +cairo_private cairo_status_t +i915_shader_acquire_pattern (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_pattern_t *pattern, + const cairo_rectangle_int_t *extents); + +cairo_private void +i915_shader_set_clip (i915_shader_t *shader, + cairo_clip_t *clip); + +cairo_private int +i915_shader_num_texcoords (const i915_shader_t *shader); + +static inline double cairo_const +i915_shader_linear_texcoord (const struct i915_shader_linear *l, + double src_x, double src_y) +{ + return l->dx * src_x + l->dy * src_y + l->offset; +} + +cairo_private cairo_status_t +i915_shader_commit (i915_shader_t *shader, + i915_device_t *device); + +cairo_private void +i915_shader_fini (i915_shader_t *shader); + +cairo_private cairo_status_t +i915_fixup_unbounded (i915_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip); + +#endif /* CAIRO_DRM_I915_PRIVATE_H */ diff --git a/src/drm/cairo-drm-i915-shader.c b/src/drm/cairo-drm-i915-shader.c new file mode 100644 index 00000000..afcbd42b --- /dev/null +++ b/src/drm/cairo-drm-i915-shader.c @@ -0,0 +1,2674 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-error-private.h" +#include "cairo-drm-i915-private.h" +#include "cairo-surface-subsurface-private.h" +#include "cairo-surface-snapshot-private.h" + +#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS +/* for DRI2/DRM interoperability */ +#include "cairo-xcb-private.h" +#endif + +#if 0 +static cairo_status_t +i915_packed_pixel_surface_finish (void *abstract_surface) +{ + i915_packed_pixel_surface_t *surface = abstract_surface; + i915_device_t *device; + + device = i915_device_acquire (&surface->device->intel.base); + + intel_bo_destroy (&device->intel, surface->bo); + + if (surface->is_current_texture) { + if (surface->is_current_texture & CURRENT_SOURCE) + device->current_source = NULL; + if (surface->is_current_texture & CURRENT_MASK) + device->current_mask = NULL; + device->current_n_samplers = 0; + } + + i915_device_release (device); + + return CAIRO_STATUS_SUCCESS; +} + +static const cairo_surface_backend_t i915_packed_pixel_surface_backend = { + I915_PACKED_PIXEL_SURFACE_TYPE, + i915_packed_pixel_surface_finish, +}; + +static cairo_surface_t * +i915_packed_pixel_surface_create (i915_device_t *device, + i915_packed_pixel_t pixel, + const uint8_t *data, + uint32_t length, + uint32_t width, uint32_t height) +{ + i915_packed_pixel_surface_t *surface; + cairo_content_t content; + uint32_t tiling, size; + uint32_t stride, half_stride; + uint32_t i; + + if (width > 2048 || height > 2048) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_SIZE)); + + surface = malloc (sizeof (i915_packed_pixel_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + tiling = I915_TILING_NONE; /* XXX */ + half_stride = stride = i915_tiling_stride (tiling, width/2); + if (stride < width) + stride *= 2 ; + height = i915_tiling_height (tiling, height); + + switch (surface->pixel = pixel) { + case YUV_I420: + content = CAIRO_CONTENT_COLOR; + + surface->offset[0] = 0; + surface->width[0] = width; + surface->height[0] = height; + surface->stride[0] = stride; + surface->map0[0] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling); + surface->map0[0] |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + surface->map1[0] = (stride / 4 - 1) << MS4_PITCH_SHIFT; + + surface->offset[1] = stride * height; + surface->width[1] = width / 2; + surface->height[1] = height / 2; + surface->stride[1] = half_stride; + surface->map0[1] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling); + surface->map0[1] |= ((height/2 - 1) << MS3_HEIGHT_SHIFT) | + ((width/2 - 1) << MS3_WIDTH_SHIFT); + surface->map1[1] = (half_stride / 4 - 1) << MS4_PITCH_SHIFT; + + if (width < half_stride) { + surface->offset[2] = stride * height + half_stride / 2; + size = stride * height + half_stride * height / 2; + } else { + surface->offset[2] = stride * height + half_stride * height / 2; + size = stride * height + half_stride * height; + } + surface->width[2] = width / 2; + surface->height[2] = height / 2; + surface->stride[2] = half_stride; + surface->map0[2] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling); + surface->map0[2] |= ((height/2 - 1) << MS3_HEIGHT_SHIFT) | + ((width/2 - 1) << MS3_WIDTH_SHIFT); + surface->map1[2] = (half_stride / 4 - 1) << MS4_PITCH_SHIFT; + break; + + case NONE: + case YUV_YV12: + case YUV_YUY2: + case YUV_UYVY: + ASSERT_NOT_REACHED; + break; + } + + _cairo_surface_init (&surface->base, + &i915_packed_pixel_surface_backend, + content); + + surface->bo = intel_bo_create (&device->intel, size, FALSE); + assert (tiling == I915_TILING_NONE); + if (unlikely (surface->bo == NULL)) { + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + + if (tiling == I915_TILING_NONE) { + intel_bo_t *bo = surface->bo; + uint32_t dst; + int uv; + + dst = surface->offset[0]; + if (width == stride) { + size = stride * height; + intel_bo_write (&device->intel, bo, dst, size, data); + data += size; + } else { + for (i = 0; i < height; i++) { + intel_bo_write (&device->intel, bo, dst, width, data); + dst += stride; + data += width; + } + } + + for (uv = 1; uv <= 2; uv++) { + dst = surface->offset[uv]; + if (width / 2 == half_stride) { + size = half_stride * height / 2; + intel_bo_write (&device->intel, bo, dst, size, data); + data += size; + } else { + size = width / 2; + for (i = 0; i < height / 2; i++) { + intel_bo_write (&device->intel, bo, dst, size, data); + dst += half_stride; + data += size; + } + } + } + } else { + uint8_t *dst, *base; + + base = intel_bo_map (&device->intel, surface->bo); + + dst = base + surface->offset[0]; + if (width == stride) { + size = stride * height; + memcpy (dst, data, size); + data += size; + } else { + for (i = 0; i < height; i++) { + memcpy (dst, data, width); + dst += stride; + data += width; + } + } + + dst = base + surface->offset[1]; + if (width / 2 == half_stride) { + size = half_stride * height / 2; + memcpy (dst, data, size); + data += size; + } else { + size = width / 2; + for (i = 0; i < height / 2; i++) { + memcpy (dst, data, size); + dst += half_stride; + data += size; + } + } + + dst = base + surface->offset[2]; + if (width / 2 == half_stride) { + size = half_stride * height / 2; + memcpy (dst, data, size); + data += size; + } else { + size = width / 2; + for (i = 0; i < height / 2; i++) { + memcpy (dst, data, size); + dst += half_stride; + data += size; + } + } + + intel_bo_unmap (surface->bo); + } + + surface->device = device; + surface->is_current_texture = 0; + + return &surface->base; +} + +static cairo_int_status_t +i915_clone_yuv (i915_surface_t *surface, + cairo_surface_t *source, + int width, int height, + cairo_surface_t **clone_out) +{ + const uint8_t *mime_data = NULL; + unsigned int mime_data_length; + cairo_surface_t *clone; + + cairo_surface_get_mime_data (source, "video/x-raw-yuv/i420", + &mime_data, &mime_data_length); + if (mime_data == NULL) + return CAIRO_INT_STATUS_UNSUPPORTED; + + clone = + i915_packed_pixel_surface_create ((i915_device_t *) surface->base.device, + YUV_I420, + mime_data, mime_data_length, + width, height); + if (clone == NULL) + return CAIRO_INT_STATUS_UNSUPPORTED; + if (unlikely (clone->status)) + return clone->status; + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} +#endif + +/* Max instruction count: 4 */ +static void +i915_shader_linear_color (i915_device_t *device, + enum i915_shader_linear_mode mode, + int in, int c0, int c1, int out) +{ + int tmp = FS_U0; + + switch (mode) { + case LINEAR_TEXTURE: + ASSERT_NOT_REACHED; + case LINEAR_NONE: + tmp = in; + break; + + case LINEAR_REPEAT: + i915_fs_frc (tmp, i915_fs_operand (in, X, X, X, X)); + break; +#if 0 + case LINEAR_REFLECT: + /* XXX needs an extra constant: C2 [0.5, 2.0, x, x] */ + i915_fs_mul (tmp, in, 0.5); + i915_fs_frc (tmp, i915_fs_operand_reg (tmp)); + i915_fs_mul (tmp, tmp, 2.0); + i915_fs_add (tmp, i915_fs_operand_one (), + i915_fs_operand_reg_negate (tmp)); + i915_fs_cmp (tmp, + i915_fs_operand_reg (tmp), + i915_fs_operand_reg (tmp), + i915_fs_operand_reg_negate (tmp)); + i915_fs_add (tmp, i915_fs_operand_one (), + i915_fs_operand_reg_negate (tmp)); +#endif + case LINEAR_PAD: + i915_fs_max (tmp, + i915_fs_operand_zero (), + i915_fs_operand (in, X, X, X, X)); + i915_fs_min (tmp, + i915_fs_operand_one (), + i915_fs_operand_reg (tmp)); + break; + } + + /* interpolate */ + i915_fs_mad (out, 0, + i915_fs_operand (tmp, -X, -X, -X, -X), + i915_fs_operand_reg (c0), + i915_fs_operand_reg (c0)); + i915_fs_mad (out, 0, + i915_fs_operand (tmp, X, X, X, X), + i915_fs_operand_reg (c1), + i915_fs_operand_reg (out)); +} + +static void +i915_shader_radial_init (struct i915_shader_radial *r, + const cairo_radial_pattern_t *radial) +{ + double dx, dy, dr, r1; + + dx = _cairo_fixed_to_double (radial->c2.x - radial->c1.x); + dy = _cairo_fixed_to_double (radial->c2.y - radial->c1.y); + dr = _cairo_fixed_to_double (radial->r2 - radial->r1); + + r1 = _cairo_fixed_to_double (radial->r1); + + if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) { + /* XXX dr == 0, meaningless with anything other than PAD */ + r->constants[0] = _cairo_fixed_to_double (radial->c1.x) / dr; + r->constants[1] = _cairo_fixed_to_double (radial->c1.y) / dr; + r->constants[2] = 1. / dr; + r->constants[3] = -r1 / dr; + + r->constants[4] = 0; + r->constants[5] = 0; + r->constants[6] = 0; + r->constants[7] = 0; + + r->base.mode = RADIAL_ONE; + } else { + r->constants[0] = -_cairo_fixed_to_double (radial->c1.x); + r->constants[1] = -_cairo_fixed_to_double (radial->c1.y); + r->constants[2] = r1; + r->constants[3] = -4 * (dx*dx + dy*dy - dr*dr); + + r->constants[4] = -2 * dx; + r->constants[5] = -2 * dy; + r->constants[6] = -2 * r1 * dr; + r->constants[7] = 1 / (2 * (dx*dx + dy*dy - dr*dr)); + + r->base.mode = RADIAL_TWO; + } + + r->base.matrix = radial->base.base.matrix; +} + +/* Max instruction count: 10 */ +static void +i915_shader_radial_coord (i915_device_t *device, + enum i915_shader_radial_mode mode, + int in, int g0, int g1, int out) +{ + switch (mode) { + case RADIAL_ONE: + /* + pdx = (x - c1x) / dr, pdy = (y - c1y) / dr; + r² = pdx*pdx + pdy*pdy + t = r²/sqrt(r²) - r1/dr; + */ + i915_fs_mad (FS_U0, MASK_X | MASK_Y, + i915_fs_operand (in, X, Y, ZERO, ZERO), + i915_fs_operand (g0, Z, Z, ZERO, ZERO), + i915_fs_operand (g0, -X, -Y, ZERO, ZERO)); + i915_fs_dp2add (FS_U0, MASK_X, + i915_fs_operand (FS_U0, X, Y, ZERO, ZERO), + i915_fs_operand (FS_U0, X, Y, ZERO, ZERO), + i915_fs_operand_zero ()); + i915_fs_rsq (out, MASK_X, i915_fs_operand (FS_U0, X, X, X, X)); + i915_fs_mad (out, MASK_X, + i915_fs_operand (FS_U0, X, ZERO, ZERO, ZERO), + i915_fs_operand (out, X, ZERO, ZERO, ZERO), + i915_fs_operand (g0, W, ZERO, ZERO, ZERO)); + break; + + case RADIAL_TWO: + /* + pdx = x - c1x, pdy = y - c1y; + A = dx² + dy² - dr² + B = -2*(pdx*dx + pdy*dy + r1*dr); + C = pdx² + pdy² - r1²; + det = B*B - 4*A*C; + t = (-B + sqrt (det)) / (2 * A) + */ + + /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */ + i915_fs_add (FS_U0, + i915_fs_operand (in, X, Y, ZERO, ZERO), + i915_fs_operand (g0, X, Y, Z, ZERO)); + /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */ + i915_fs_dp3 (FS_U0, MASK_W, + i915_fs_operand (FS_U0, X, Y, ONE, ZERO), + i915_fs_operand (g1, X, Y, Z, ZERO)); + /* u1.x = pdx² + pdy² - r1²; [C] */ + i915_fs_dp3 (FS_U1, MASK_X, + i915_fs_operand (FS_U0, X, Y, Z, ZERO), + i915_fs_operand (FS_U0, X, Y, -Z, ZERO)); + /* u1.x = C, u1.y = B, u1.z=-4*A; */ + i915_fs_mov_masked (FS_U1, MASK_Y, i915_fs_operand (FS_U0, W, W, W, W)); + i915_fs_mov_masked (FS_U1, MASK_Z, i915_fs_operand (g0, W, W, W, W)); + /* u1.x = B² - 4*A*C */ + i915_fs_dp2add (FS_U1, MASK_X, + i915_fs_operand (FS_U1, X, Y, ZERO, ZERO), + i915_fs_operand (FS_U1, Z, Y, ZERO, ZERO), + i915_fs_operand_zero ()); + /* out.x = -B + sqrt (B² - 4*A*C), + * out.y = -B - sqrt (B² - 4*A*C), + */ + i915_fs_rsq (out, MASK_X, i915_fs_operand (FS_U1, X, X, X, X)); + i915_fs_mad (out, MASK_X | MASK_Y, + i915_fs_operand (out, X, X, ZERO, ZERO), + i915_fs_operand (FS_U1, X, -X, ZERO, ZERO), + i915_fs_operand (FS_U0, -W, -W, ZERO, ZERO)); + /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), + * out.y = (-B - sqrt (B² - 4*A*C)) / (2 * A) + */ + i915_fs_mul (out, + i915_fs_operand (out, X, Y, ZERO, ZERO), + i915_fs_operand (g1, W, W, ZERO, ZERO)); + /* if (A > 0) + * out = (-B + sqrt (B² - 4*A*C)) / (2 * A), + * else + * out = (-B - sqrt (B² - 4*A*C)) / (2 * A) + */ + i915_fs_cmp (out, + i915_fs_operand (g1, W, ZERO, ZERO, ZERO), + i915_fs_operand (out, X, ZERO, ZERO, ZERO), + i915_fs_operand (out, Y, ZERO, ZERO, ZERO)); + break; + } +} + +/* Max instruction count: 7 */ +static inline void +i915_shader_yuv_color (i915_device_t *device, + int y, int u, int v, + int c0, int c1, int c2, + int out) +{ + i915_fs_mov_masked (FS_U0, MASK_X, i915_fs_operand_reg (y)); + i915_fs_mov_masked (FS_U0, MASK_Y, i915_fs_operand_reg (u)); + i915_fs_mov_masked (FS_U0, MASK_Z, i915_fs_operand_reg (v)); + + i915_fs_add (FS_U0, + i915_fs_operand_reg (FS_U0), + i915_fs_operand_reg (c0)); + i915_fs_dp3 (out, MASK_X, + i915_fs_operand_reg (FS_U0), + i915_fs_operand (c1, X, ZERO, Y, ZERO)); + i915_fs_dp3 (out, MASK_Z, + i915_fs_operand_reg (FS_U0), + i915_fs_operand (c1, Z, W, ZERO, ZERO)); + i915_fs_dp3 (out, MASK_Y, + i915_fs_operand_reg (FS_U0), + i915_fs_operand_reg (c2)); +} + +static inline uint32_t +i915_shader_channel_key (const union i915_shader_channel *channel) +{ + return (channel->type.fragment & 0x0f) | (channel->base.mode << FS_DETAILS_SHIFT); +} + +static uint32_t +i915_shader_channel_get_num_tex_coords (const union i915_shader_channel *channel) +{ + switch (channel->type.fragment) { + default: + case FS_ZERO: + case FS_ONE: + case FS_CONSTANT: + case FS_PURE: + case FS_DIFFUSE: + return 0; + + case FS_LINEAR: + case FS_RADIAL: + case FS_TEXTURE: + case FS_SPANS: + case FS_YUV: + return 1; + } +} + +static uint32_t +i915_shader_get_num_tex_coords (const i915_shader_t *shader) +{ + uint32_t num_tex_coords; + + num_tex_coords = 0; + + num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->source); + num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->mask); + num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->clip); + num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->dst); + + return num_tex_coords; +} + +#define i915_fs_operand_impure(reg, channel, pure) \ + (reg | \ + (((pure & (1 << 0)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \ + (((pure & (1 << 1)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \ + (((pure & (1 << 2)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \ + (((pure & (1 << 3)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT)) + +#define i915_fs_operand_pure(pure) \ + (FS_R0 | \ + (((pure & (1 << 0)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \ + (((pure & (1 << 1)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \ + (((pure & (1 << 2)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \ + (((pure & (1 << 3)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT)) + +#define i915_fs_operand_reg_pure(reg, pure) \ + (reg | \ + (((pure & (1 << 0)) ? X_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \ + (((pure & (1 << 1)) ? Y_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \ + (((pure & (1 << 2)) ? Z_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \ + (((pure & (1 << 3)) ? W_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT)) + +static void +i915_set_shader_program (i915_device_t *device, + const i915_shader_t *shader) +{ + uint32_t num_tex_coords; + uint32_t num_samplers; + uint32_t n; + uint32_t texture_offset = 0; + uint32_t constant_offset = 0; + uint32_t sampler_offset = 0; + uint32_t source_reg; + uint32_t source_pure; + uint32_t mask_reg; + uint32_t out_reg; + uint32_t dest_reg; + FS_LOCALS; + + n = (i915_shader_channel_key (&shader->source) << 0) | + (i915_shader_channel_key (&shader->mask) << 8) | + (i915_shader_channel_key (&shader->clip) << 16) | + (shader->op << 24) | + (((shader->content & CAIRO_CONTENT_ALPHA) == CAIRO_CONTENT_ALPHA) << 31); + if (n == device->current_program) + return; + device->current_program = n; + + FS_BEGIN (); + + if (shader->source.type.fragment == FS_ZERO) { + if (shader->clip.type.fragment == FS_TEXTURE) { + /* XXX need_combine */ + assert (shader->mask.type.fragment == (i915_fragment_shader_t) -1); + i915_fs_dcl (FS_T0); + i915_fs_texld (FS_U0, FS_S0, FS_T0); + if ((shader->content & CAIRO_CONTENT_COLOR) == 0) + i915_fs_mov (FS_OC, i915_fs_operand (FS_U0, W, W, W, W)); + else + i915_fs_mov (FS_OC, i915_fs_operand (FS_U0, ZERO, ZERO, ZERO, W)); + } else { + i915_fs_mov (FS_OC, i915_fs_operand_zero ()); + } + + FS_END (); + return; + } + + num_tex_coords = i915_shader_get_num_tex_coords (shader); + for (n = 0; n < num_tex_coords; n++) + i915_fs_dcl (FS_T0 + n); + + num_samplers = + shader->source.base.n_samplers + + shader->mask.base.n_samplers + + shader->clip.base.n_samplers + + shader->dst.base.n_samplers; + for (n = 0; n < num_samplers; n++) + i915_fs_dcl (FS_S0 + n); + + source_reg = ~0; + source_pure = 0; + out_reg = FS_R0; + if (! shader->need_combine && + shader->mask.type.fragment == (i915_fragment_shader_t) -1 && + shader->clip.type.fragment != FS_TEXTURE && + shader->content != CAIRO_CONTENT_ALPHA) + { + out_reg = FS_OC; + } + + switch (shader->source.type.fragment) { + default: + case FS_ZERO: + case FS_SPANS: + ASSERT_NOT_REACHED; + + case FS_PURE: + source_pure = shader->source.solid.pure; + case FS_ONE: + break; + + case FS_CONSTANT: + source_reg = FS_C0; + constant_offset += 1; + break; + + case FS_DIFFUSE: + i915_fs_dcl (FS_T8); + source_reg = FS_T8; + break; + + case FS_LINEAR: + i915_shader_linear_color (device, shader->source.base.mode, + FS_T0, /* input */ + FS_C0, FS_C1, /* colour ramp */ + FS_U3); /* unpremultiplied output */ + /* XXX can we defer premultiplication? */ + i915_fs_mul (out_reg, + i915_fs_operand_reg (FS_U3), + i915_fs_operand (FS_U3, W, W, W, W)); + + constant_offset += 2; + texture_offset += 1; + source_reg = out_reg; + break; + + case FS_RADIAL: + i915_shader_radial_coord (device, shader->source.base.mode, + FS_T0, /* input */ + FS_C0, FS_C1, /* gradient constants */ + FS_U3); /* coordinate */ + + i915_fs_texld (out_reg, FS_S0, FS_U3); + constant_offset += 2; + texture_offset += 1; + sampler_offset += 1; + source_reg = out_reg; + break; + + case FS_TEXTURE: + i915_fs_texld (out_reg, FS_S0, FS_T0); + texture_offset += 1; + sampler_offset += 1; + source_reg = out_reg; + break; + + case FS_YUV: + /* Load samplers to temporaries. */ + i915_fs_texld (FS_R0, FS_S0, FS_T0); + i915_fs_texld (FS_R1, FS_S1, FS_T0); + i915_fs_texld (FS_R2, FS_S2, FS_T0); + + i915_shader_yuv_color (device, + FS_R0, FS_R1, FS_R2, /* y, u, v */ + FS_C0, FS_C1, FS_C2, /* coefficients */ + out_reg); + + constant_offset += 3; + texture_offset += 1; + sampler_offset += 3; + source_reg = out_reg; + break; + } + + mask_reg = ~0; + switch (shader->mask.type.fragment) { + case FS_PURE: + case FS_ZERO: + case FS_YUV: + case FS_DIFFUSE: + ASSERT_NOT_REACHED; + case FS_ONE: + default: + break; + + case FS_SPANS: + mask_reg = FS_T0 + texture_offset; + texture_offset += 1; + break; + + case FS_CONSTANT: + mask_reg = FS_C0 + constant_offset; + constant_offset += 1; + break; + + case FS_LINEAR: + i915_shader_linear_color (device, shader->source.base.mode, + FS_T0 + texture_offset, /* input */ + FS_C0 + constant_offset, + FS_C0 + constant_offset + 1, /* colour ramp */ + FS_U3); /* unpremultiplied output */ + i915_fs_mul (FS_R1, + i915_fs_operand_reg (FS_U3), + i915_fs_operand (source_reg, W, W, W, W)); + + constant_offset += 2; + texture_offset += 1; + mask_reg = FS_R1; + break; + + case FS_RADIAL: + i915_shader_radial_coord (device, shader->source.base.mode, + FS_T0 + texture_offset, /* input */ + FS_C0 + constant_offset, + FS_C0 + constant_offset + 1, /* gradient constants */ + FS_U3); /* coordinate */ + + i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_U3); + constant_offset += 2; + texture_offset += 1; + sampler_offset += 1; + mask_reg = FS_R1; + break; + + case FS_TEXTURE: + i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_T0 + texture_offset); + texture_offset += 1; + sampler_offset += 1; + mask_reg = FS_R1; + break; + } + + if (mask_reg != ~0U) { + if (! shader->need_combine && + shader->clip.type.fragment != FS_TEXTURE && + shader->content != CAIRO_CONTENT_ALPHA) + { + out_reg = FS_OC; + } + if (source_reg == ~0U) { + if (source_pure) { + if (shader->mask.type.fragment == FS_SPANS) { + i915_fs_mov (out_reg, + i915_fs_operand_impure (mask_reg, X, source_pure)); + } else { + /* XXX ComponentAlpha + i915_fs_mov (out_reg, + i915_fs_operand_pure (mask_reg, + shader->source.solid.pure)); + */ + i915_fs_mov (out_reg, + i915_fs_operand_impure (mask_reg, W, source_pure)); + } + source_reg = out_reg; + } else if (shader->mask.type.fragment == FS_SPANS) { + i915_fs_mov (out_reg, + i915_fs_operand (mask_reg, X, X, X, X)); + source_reg = out_reg; + } else { + source_reg = mask_reg; + } + } else { + if (shader->mask.type.fragment == FS_SPANS) { + i915_fs_mul (out_reg, + i915_fs_operand_reg (source_reg), + i915_fs_operand (mask_reg, X, X, X, X)); + } else { + /* XXX ComponentAlpha + i915_fs_mul (FS_R0, + i915_fs_operand_reg (source_reg), + i915_fs_operand_reg (mask_reg)); + */ + i915_fs_mul (out_reg, + i915_fs_operand_reg (source_reg), + i915_fs_operand (mask_reg, W, W, W, W)); + } + + source_reg = out_reg; + } + } + + /* need to preserve order of src, mask, clip, dst */ + mask_reg = ~0; + if (shader->clip.type.fragment == FS_TEXTURE) { + i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_T0 + texture_offset); + texture_offset += 1; + sampler_offset += 1; + mask_reg = FS_R1; + } + + if (shader->need_combine) { + assert (shader->dst.type.fragment == FS_TEXTURE); + + i915_fs_texld (FS_R2, FS_S0 + sampler_offset, FS_T0 + texture_offset); + texture_offset += 1; + sampler_offset += 1; + dest_reg = FS_R2; + + switch (shader->op) { + case CAIRO_OPERATOR_CLEAR: + case CAIRO_OPERATOR_SOURCE: + ASSERT_NOT_REACHED; + + case CAIRO_OPERATOR_OVER: + if (source_reg == ~0U) { + /* XXX shader->source.type.fragment == FS_PURE */ + dest_reg = FS_OC; + } else { + i915_fs_add (FS_U0, + i915_fs_operand (source_reg, -W, -W, -W, -W), + i915_fs_operand_one ()); + i915_fs_mul (FS_U0, + i915_fs_operand_reg (FS_U0), + dest_reg); + i915_fs_add (FS_R3, + i915_fs_operand_reg (source_reg), + i915_fs_operand_reg (FS_U0)); + source_reg = FS_R3; + } + break; + + case CAIRO_OPERATOR_IN: + if (source_reg == ~0U) { + /* XXX shader->source.type.fragment == FS_PURE */ + source_reg = dest_reg; + } else { + i915_fs_mul (FS_R3, + i915_fs_operand_reg (source_reg), + dest_reg); + source_reg = FS_R3; + } + break; + + case CAIRO_OPERATOR_OUT: + if (source_reg == ~0U) { + /* XXX shader->source.type.fragment == FS_PURE */ + i915_fs_mov (FS_R3, i915_fs_operand_zero ()); + source_reg = FS_R3; + } else { + i915_fs_add (FS_U0, + i915_fs_operand (source_reg, -W, -W, -W, -W), + i915_fs_operand_one ()); + i915_fs_mul (FS_R3, + i915_fs_operand_reg (FS_U0), + dest_reg); + source_reg = FS_R3; + } + break; + + case CAIRO_OPERATOR_ATOP: + + case CAIRO_OPERATOR_DEST: + case CAIRO_OPERATOR_DEST_OVER: + case CAIRO_OPERATOR_DEST_IN: + case CAIRO_OPERATOR_DEST_OUT: + case CAIRO_OPERATOR_DEST_ATOP: + + case CAIRO_OPERATOR_XOR: + case CAIRO_OPERATOR_ADD: + case CAIRO_OPERATOR_SATURATE: + + case CAIRO_OPERATOR_MULTIPLY: + case CAIRO_OPERATOR_SCREEN: + case CAIRO_OPERATOR_OVERLAY: + case CAIRO_OPERATOR_DARKEN: + case CAIRO_OPERATOR_LIGHTEN: + case CAIRO_OPERATOR_COLOR_DODGE: + case CAIRO_OPERATOR_COLOR_BURN: + case CAIRO_OPERATOR_HARD_LIGHT: + case CAIRO_OPERATOR_SOFT_LIGHT: + case CAIRO_OPERATOR_DIFFERENCE: + case CAIRO_OPERATOR_EXCLUSION: + case CAIRO_OPERATOR_HSL_HUE: + case CAIRO_OPERATOR_HSL_SATURATION: + case CAIRO_OPERATOR_HSL_COLOR: + case CAIRO_OPERATOR_HSL_LUMINOSITY: + ASSERT_NOT_REACHED; + break; + } + } + + if (shader->clip.type.fragment == FS_TEXTURE) { + assert (mask_reg != ~0U); + + if (! shader->need_combine) { + /* (source IN clip) */ + if (source_reg == ~0U) { + if (source_pure == 0) { + source_reg = mask_reg; + } else { + out_reg = FS_OC; + if (shader->content == CAIRO_CONTENT_ALPHA) + out_reg = FS_U0; + i915_fs_mov (out_reg, + i915_fs_operand_reg_pure (mask_reg, source_pure)); + source_reg = out_reg; + } + } else if (mask_reg) { + out_reg = FS_OC; + if (shader->content == CAIRO_CONTENT_ALPHA) + out_reg = FS_U0; + i915_fs_mul (out_reg, + i915_fs_operand_reg (source_reg), + i915_fs_operand (mask_reg, W, W, W, W)); + + source_reg = out_reg; + } + } else { + /* (source OP dest) LERP_clip dest */ + if (source_reg == ~0U) { + if (source_pure == 0) { + i915_fs_mov (FS_U0, + i915_fs_operand (mask_reg, W, W, W, W)); + } else { + i915_fs_mov (FS_U0, + i915_fs_operand_impure (mask_reg, W, source_pure)); + } + } else { + i915_fs_mul (FS_U0, + i915_fs_operand_reg (source_reg), + i915_fs_operand (mask_reg, W, W, W, W)); + } + + i915_fs_add (mask_reg, + i915_fs_operand_one (), + i915_fs_operand (mask_reg, -W, -W, -W, -W)); + + if (dest_reg != FS_OC) { + if (dest_reg == ~0U) { + assert (shader->dst.type.fragment == FS_TEXTURE); + + i915_fs_texld (FS_R2, FS_S0 + sampler_offset, FS_T0 + texture_offset); + texture_offset += 1; + sampler_offset += 1; + dest_reg = FS_R2; + } + + i915_fs_mul (FS_U1, + i915_fs_operand_reg (dest_reg), + i915_fs_operand_reg (mask_reg)); + mask_reg = FS_U1; + } + + source_reg = FS_OC; + if (shader->content != CAIRO_CONTENT_COLOR_ALPHA) + source_reg = FS_U0; + i915_fs_add (source_reg, + i915_fs_operand_reg (FS_U0), + i915_fs_operand_reg (mask_reg)); + } + } + + if (source_reg != FS_OC) { + if (source_reg == ~0U) + if (source_pure) + i915_fs_mov (FS_OC, i915_fs_operand_pure (source_pure)); + else + i915_fs_mov (FS_OC, i915_fs_operand_one ()); + else if ((shader->content & CAIRO_CONTENT_COLOR) == 0) + i915_fs_mov (FS_OC, i915_fs_operand (source_reg, W, W, W, W)); + else + i915_fs_mov (FS_OC, i915_fs_operand_reg (source_reg)); + } else { + if ((shader->content & CAIRO_CONTENT_COLOR) == 0) + i915_fs_mov (FS_OC, i915_fs_operand (FS_OC, W, W, W, W)); + } + + FS_END (); +} + +static void +i915_shader_linear_init (struct i915_shader_linear *l, + const cairo_linear_pattern_t *linear) +{ + double x0, y0, sf; + double dx, dy, offset; + + dx = _cairo_fixed_to_double (linear->p2.x - linear->p1.x); + dy = _cairo_fixed_to_double (linear->p2.y - linear->p1.y); + sf = 1. / (dx * dx + dy * dy); + dx *= sf; + dy *= sf; + + x0 = _cairo_fixed_to_double (linear->p1.x); + y0 = _cairo_fixed_to_double (linear->p1.y); + offset = dx*x0 + dy*y0; + + if (_cairo_matrix_is_identity (&linear->base.base.matrix)) { + l->dx = dx; + l->dy = dy; + l->offset = -offset; + } else { + cairo_matrix_t m; + + cairo_matrix_init (&m, dx, 0, dy, 0, -offset, 0); + cairo_matrix_multiply (&m, &linear->base.base.matrix, &m); + l->dx = m.xx; + l->dy = m.xy; + l->offset = m.x0; + } +} + +static cairo_bool_t +i915_shader_linear_contains_rectangle (struct i915_shader_linear *l, + const cairo_rectangle_int_t *extents) +{ + double v; + + v = i915_shader_linear_texcoord (l, + extents->x, + extents->y); + if (v < 0.) + return FALSE; + if (v > 1.) + return FALSE; + + v = i915_shader_linear_texcoord (l, + extents->x + extents->width, + extents->y); + if (v < 0.) + return FALSE; + if (v > 1.) + return FALSE; + + v = i915_shader_linear_texcoord (l, + extents->x, + extents->y + extents->height); + if (v < 0.) + return FALSE; + if (v > 1.) + return FALSE; + + v = i915_shader_linear_texcoord (l, + extents->x + extents->width, + extents->y + extents->height); + if (v < 0.) + return FALSE; + if (v > 1.) + return FALSE; + + return TRUE; +} + +#define is_pure(C,mask) (((mask) == 0) || (C) <= 0x00ff || (C) >= 0xff00) +#define is_one(C,mask) (((mask) != 0) && (C) >= 0xff00) +#define is_zero(C,mask) (((mask) != 0) && (C) <= 0x00ff) + +static cairo_status_t +i915_shader_acquire_solid (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_solid_pattern_t *solid, + const cairo_rectangle_int_t *extents) +{ + cairo_content_t content; + + content = solid->content; + src->solid.color = solid->color; + if (content == 0 || solid->color.alpha_short <= 0x00ff) + { + src->base.content = CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_ZERO; + } + else if ((((content & CAIRO_CONTENT_COLOR) == 0) || + (solid->color.red_short >= 0xff00 && + solid->color.green_short >= 0xff00 && + solid->color.blue_short >= 0xff00)) && + ((content & CAIRO_CONTENT_ALPHA) == 0 || + solid->color.alpha_short >= 0xff00)) + { + src->base.content = CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_ONE; + } + else if (is_pure (solid->color.red_short, content & CAIRO_CONTENT_COLOR) && + is_pure (solid->color.green_short, content & CAIRO_CONTENT_COLOR) && + is_pure (solid->color.blue_short, content & CAIRO_CONTENT_COLOR) && + is_pure (solid->color.alpha_short, content & CAIRO_CONTENT_ALPHA)) + { + src->solid.pure = 0; + src->solid.pure |= is_one (solid->color.red_short, content & CAIRO_CONTENT_COLOR) << 0; + src->solid.pure |= is_one (solid->color.green_short, content & CAIRO_CONTENT_COLOR) << 1; + src->solid.pure |= is_one (solid->color.blue_short, content & CAIRO_CONTENT_COLOR) << 2; + src->solid.pure |= (! is_zero (solid->color.alpha_short, content & CAIRO_CONTENT_ALPHA)) << 3; + + if (src->solid.pure == 0) { + src->base.content = CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_ZERO; + } else if (src->solid.pure == 0x7) { + src->base.content = CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_ONE; + } else { + src->base.content = content; + src->type.fragment = FS_PURE; + src->base.mode = src->solid.pure; + } + } + else + { + src->base.content = content; + src->type.fragment = src == &shader->source ? FS_DIFFUSE : FS_CONSTANT; + } + src->type.vertex = VS_CONSTANT; + src->type.pattern = PATTERN_CONSTANT; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_shader_acquire_linear (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_linear_pattern_t *linear, + const cairo_rectangle_int_t *extents) +{ + cairo_bool_t mode = LINEAR_TEXTURE; + cairo_status_t status; + + i915_shader_linear_init (&src->linear, linear); + if (linear->base.n_stops == 2 && + linear->base.stops[0].offset == 0.0 && + linear->base.stops[1].offset == 1.0) + { + if (i915_shader_linear_contains_rectangle (&src->linear, + extents)) + { + /* XXX can also lerp if contained within offset range */ + mode = LINEAR_NONE; + } + else switch (linear->base.base.extend) { + case CAIRO_EXTEND_REPEAT: + mode = LINEAR_REPEAT; + break; + case CAIRO_EXTEND_PAD: + mode = LINEAR_PAD; + break; + case CAIRO_EXTEND_NONE: + break; + case CAIRO_EXTEND_REFLECT: + break; + default: + ASSERT_NOT_REACHED; + break; + } + } + + src->type.vertex = VS_LINEAR; + src->type.pattern = PATTERN_LINEAR; + src->base.texfmt = TEXCOORDFMT_1D; + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->base.mode = mode; + if (mode == LINEAR_TEXTURE) { + intel_buffer_t buffer; + + status = intel_gradient_render ((intel_device_t *) shader->target->intel.drm.base.device, + &linear->base, &buffer); + if (unlikely (status)) + return status; + + src->type.fragment = FS_TEXTURE; + src->base.bo = intel_bo_reference (buffer.bo); + src->base.n_samplers = 1; + src->base.offset[0] = buffer.offset; + src->base.map[0] = buffer.map0; + src->base.map[1] = buffer.map1; + src->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_BILINEAR); + src->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (linear->base.base.extend); + } else { + src->type.fragment = FS_LINEAR; + src->linear.color0.red = linear->base.stops[0].color.red; + src->linear.color0.green = linear->base.stops[0].color.green; + src->linear.color0.blue = linear->base.stops[0].color.blue; + src->linear.color0.alpha = linear->base.stops[0].color.alpha; + + src->linear.color1.red = linear->base.stops[1].color.red; + src->linear.color1.green = linear->base.stops[1].color.green; + src->linear.color1.blue = linear->base.stops[1].color.blue; + src->linear.color1.alpha = linear->base.stops[1].color.alpha; + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_shader_acquire_radial (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_radial_pattern_t *radial, + const cairo_rectangle_int_t *extents) +{ + intel_buffer_t buffer; + cairo_status_t status; + + status = intel_gradient_render ((intel_device_t *) shader->target->intel.drm.base.device, + &radial->base, &buffer); + if (unlikely (status)) + return status; + + i915_shader_radial_init (&src->radial, radial); + + src->type.vertex = VS_RADIAL; + src->type.fragment = FS_RADIAL; + src->type.pattern = PATTERN_RADIAL; + src->base.texfmt = TEXCOORDFMT_2D; + + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->base.bo = intel_bo_reference (buffer.bo); + src->base.n_samplers = 1; + src->base.offset[0] = buffer.offset; + src->base.map[0] = buffer.map0; + src->base.map[1] = buffer.map1; + src->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_BILINEAR); + src->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (radial->base.base.extend); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_surface_clone (i915_device_t *device, + cairo_image_surface_t *image, + i915_surface_t **clone_out) +{ + i915_surface_t *clone; + cairo_status_t status; + + clone = + i915_surface_create_from_cacheable_image_internal (device, image); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + status = _cairo_surface_attach_snapshot (&image->base, + &clone->intel.drm.base, + intel_surface_detach_snapshot); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = intel_snapshot_cache_insert (&device->intel, &clone->intel); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_surface_clone_subimage (i915_device_t *device, + cairo_image_surface_t *image, + const cairo_rectangle_int_t *extents, + i915_surface_t **clone_out) +{ + i915_surface_t *clone; + cairo_status_t status; + + clone = (i915_surface_t *) + i915_surface_create_internal (&device->intel.base, + image->base.content, + extents->width, + extents->height, + I915_TILING_DEFAULT, + FALSE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + status = intel_bo_put_image (to_intel_device (clone->intel.drm.base.device), + to_intel_bo (clone->intel.drm.bo), + clone->intel.drm.stride, + image, + extents->x, extents->y, + extents->width, extents->height, + 0, 0); + + if (unlikely (status)) + return status; + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_shader_acquire_solid_surface (i915_shader_t *shader, + union i915_shader_channel *src, + cairo_surface_t *surface, + const cairo_rectangle_int_t *extents) +{ + cairo_surface_pattern_t pattern; + cairo_surface_t *pixel; + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + uint32_t argb; + + status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); + if (unlikely (status)) + return status; + + /* extract the pixel as argb32 */ + pixel = cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 1, 1); + _cairo_pattern_init_for_surface (&pattern, &image->base); + cairo_matrix_init_translate (&pattern.base.matrix, extents->x, extents->y); + pattern.base.filter = CAIRO_FILTER_NEAREST; + status = _cairo_surface_paint (pixel, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL); + _cairo_pattern_fini (&pattern.base); + + _cairo_surface_release_source_image (surface, image, image_extra); + + if (unlikely (status)) { + cairo_surface_destroy (pixel); + return status; + } + + image = (cairo_image_surface_t *) pixel; + argb = *(uint32_t *) image->data; + cairo_surface_destroy (pixel); + + if (argb >> 24 == 0) { + _cairo_color_init_rgba (&src->solid.color, 0, 0, 0, 0); + } else { + uint8_t alpha = argb >> 24; + + _cairo_color_init_rgba (&src->solid.color, + ((((argb >> 16) & 0xff) * 255 + alpha / 2) / alpha) / 255., + ((((argb >> 8) & 0xff) * 255 + alpha / 2) / alpha) / 255., + ((((argb >> 0) & 0xff) * 255 + alpha / 2) / alpha) / 255., + alpha / 255.); + } + + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->type.fragment = FS_CONSTANT; + src->type.vertex = VS_CONSTANT; + src->type.pattern = PATTERN_CONSTANT; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_shader_acquire_surface (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_surface_pattern_t *pattern, + const cairo_rectangle_int_t *extents) +{ + int surface_width, surface_height; + cairo_surface_t *surface, *drm; + cairo_extend_t extend; + cairo_filter_t filter; + cairo_matrix_t m; + int src_x = 0, src_y = 0; + + assert (src->type.fragment == (i915_fragment_shader_t) -1); + drm = surface = pattern->surface; + +#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS + if (surface->type == CAIRO_SURFACE_TYPE_XCB) { + cairo_surface_t *xcb = surface; + + if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + xcb = ((cairo_surface_subsurface_t *) surface)->target; + } else if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { + xcb = ((cairo_surface_snapshot_t *) surface)->target; + } + + /* XXX copy windows (IncludeInferiors) to a pixmap/drm surface + * xcb = _cairo_xcb_surface_to_drm (xcb) + */ + xcb = ((cairo_xcb_surface_t *) xcb)->drm; + if (xcb != NULL) + drm = xcb; + } +#endif + + if (surface->type == CAIRO_SURFACE_TYPE_DRM) { + if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + drm = ((cairo_surface_subsurface_t *) surface)->target; + } else if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { + drm = ((cairo_surface_snapshot_t *) surface)->target; + } + } + + if (drm->type == CAIRO_SURFACE_TYPE_DRM) { + i915_surface_t *s = (i915_surface_t *) drm; + + if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + if (s->intel.drm.base.device == shader->target->intel.drm.base.device && + s != shader->target) + { + cairo_surface_subsurface_t *sub = (cairo_surface_subsurface_t *) surface; + int x; + + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) { + /* XXX pipelined flush of RENDER/TEXTURE cache */ + } + + src->type.fragment = FS_TEXTURE; + src->surface.pixel = NONE; + surface_width = sub->extents.width; + surface_height = sub->extents.height; + + src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo)); + src->base.n_samplers = 1; + + x = sub->extents.x; + if (s->intel.drm.format != CAIRO_FORMAT_A8) + x *= 4; + + /* XXX tiling restrictions upon offset? */ + src->base.offset[0] = s->offset + sub->extents.y * s->intel.drm.stride + x; + src->base.map[0] = s->map0; + src->base.map[0] &= ~((2047 << MS3_HEIGHT_SHIFT) | (2047 << MS3_WIDTH_SHIFT)); + src->base.map[0] |= + ((sub->extents.height - 1) << MS3_HEIGHT_SHIFT) | + ((sub->extents.width - 1) << MS3_WIDTH_SHIFT); + src->base.map[1] = (s->intel.drm.stride / 4 - 1) << MS4_PITCH_SHIFT; + } + } else { + /* XXX if s == shader->dst allow if FILTER_NEAREST, EXTEND_NONE? */ + if (s->intel.drm.base.device == shader->target->intel.drm.base.device && + s != shader->target) + { + src->type.fragment = FS_TEXTURE; + src->surface.pixel = NONE; + surface_width = s->intel.drm.width; + surface_height = s->intel.drm.height; + + src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo)); + src->base.n_samplers = 1; + src->base.offset[0] = s->offset; + src->base.map[0] = s->map0; + src->base.map[1] = s->map1; + } + } + } + + if (src->type.fragment == (i915_fragment_shader_t) -1) { + i915_surface_t *s; + + if (extents->width == 1 && extents->height == 1) { + return i915_shader_acquire_solid_surface (shader, src, + surface, extents); + } + + s = (i915_surface_t *) + _cairo_surface_has_snapshot (surface, + shader->target->intel.drm.base.backend); + if (s == NULL) { + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + +#if 0 + /* XXX hackity hack hack */ + status = i915_clone_yuv (surface, src, + image->width, image->height, + clone_out); +#endif + + status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); + if (unlikely (status)) + return status; + + if (image->width < 2048 && image->height < 2048) { + status = i915_surface_clone ((i915_device_t *) shader->target->intel.drm.base.device, + image, &s); + } else { + status = i915_surface_clone_subimage ((i915_device_t *) shader->target->intel.drm.base.device, + image, extents, &s); + src_x = -extents->x; + src_y = -extents->y; + } + + surface_width = image->width; + surface_height = image->height; + + _cairo_surface_release_source_image (surface, image, image_extra); + + if (unlikely (status)) + return status; + } + + src->type.fragment = FS_TEXTURE; + src->surface.pixel = NONE; + + src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo)); + src->base.n_samplers = 1; + src->base.offset[0] = s->offset; + src->base.map[0] = s->map0; + src->base.map[1] = s->map1; + + drm = &s->intel.drm.base; + } + + /* XXX transform nx1 or 1xn surfaces to 1D */ + + src->type.pattern = PATTERN_TEXTURE; + extend = pattern->base.extend; + if (extend != CAIRO_EXTEND_NONE && + extents->x >= 0 && extents->y >= 0 && + extents->x + extents->width <= surface_width && + extents->y + extents->height <= surface_height) + { + extend = CAIRO_EXTEND_NONE; + } + if (extend == CAIRO_EXTEND_NONE) { + src->type.vertex = VS_TEXTURE_16; + src->base.texfmt = TEXCOORDFMT_2D_16; + } else { + src->type.vertex = VS_TEXTURE; + src->base.texfmt = TEXCOORDFMT_2D; + } + src->base.content = drm->content; + + filter = pattern->base.filter; + if (_cairo_matrix_is_pixel_exact (&pattern->base.matrix)) + filter = CAIRO_FILTER_NEAREST; + + src->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (filter); + src->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (extend); + + /* tweak the src matrix to map from dst to texture coordinates */ + src->base.matrix = pattern->base.matrix; + if (src_x | src_y) + cairo_matrix_translate (&src->base.matrix, src_x, src_x); + if (i915_texture_filter_is_nearest (filter)) + cairo_matrix_translate (&src->base.matrix, NEAREST_BIAS, NEAREST_BIAS); + cairo_matrix_init_scale (&m, 1. / surface_width, 1. / surface_height); + cairo_matrix_multiply (&src->base.matrix, &src->base.matrix, &m); + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +i915_shader_acquire_pattern (i915_shader_t *shader, + union i915_shader_channel *src, + const cairo_pattern_t *pattern, + const cairo_rectangle_int_t *extents) +{ + switch (pattern->type) { + case CAIRO_PATTERN_TYPE_SOLID: + return i915_shader_acquire_solid (shader, src, + (cairo_solid_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_LINEAR: + return i915_shader_acquire_linear (shader, src, + (cairo_linear_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_RADIAL: + return i915_shader_acquire_radial (shader, src, + (cairo_radial_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_SURFACE: + return i915_shader_acquire_surface (shader, src, + (cairo_surface_pattern_t *) pattern, + extents); + + default: + ASSERT_NOT_REACHED; + return CAIRO_STATUS_SUCCESS; + } +} + +static uint32_t +i915_get_blend (cairo_operator_t op, + i915_surface_t *dst) +{ +#define SBLEND(X) ((BLENDFACT_##X) << S6_CBUF_SRC_BLEND_FACT_SHIFT) +#define DBLEND(X) ((BLENDFACT_##X) << S6_CBUF_DST_BLEND_FACT_SHIFT) + static const struct blendinfo { + cairo_bool_t dst_alpha; + uint32_t src_blend; + uint32_t dst_blend; + enum { + BOUNDED, + SIMPLE, + XRENDER, + } kind; + } i915_blend_op[] = { + {0, SBLEND (ZERO), DBLEND (ZERO), BOUNDED}, /* Clear */ + {0, SBLEND (ONE), DBLEND (ZERO), BOUNDED}, /* Src */ + + {0, SBLEND (ONE), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Over */ + {1, SBLEND (DST_ALPHA), DBLEND (ZERO), XRENDER}, /* In */ + {1, SBLEND (INV_DST_ALPHA), DBLEND (ZERO), XRENDER}, /* Out */ + {1, SBLEND (DST_ALPHA), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Atop */ + + {0, SBLEND (ZERO), DBLEND (ONE), SIMPLE}, /* Dst */ + {1, SBLEND (INV_DST_ALPHA), DBLEND (ONE), SIMPLE}, /* OverReverse */ + {0, SBLEND (ZERO), DBLEND (SRC_ALPHA), XRENDER}, /* InReverse */ + {0, SBLEND (ZERO), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* OutReverse */ + {1, SBLEND (INV_DST_ALPHA), DBLEND (SRC_ALPHA), XRENDER}, /* AtopReverse */ + + {1, SBLEND (INV_DST_ALPHA), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Xor */ + {0, SBLEND (ONE), DBLEND (ONE), SIMPLE}, /* Add */ + //{0, 0, SBLEND (SRC_ALPHA_SATURATE), DBLEND (ONE), SIMPLE}, /* XXX Saturate */ + }; + uint32_t sblend, dblend; + + if (op >= ARRAY_LENGTH (i915_blend_op)) + return 0; + + if (i915_blend_op[op].kind == BOUNDED) + return 0; + + sblend = i915_blend_op[op].src_blend; + dblend = i915_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if ((dst->intel.drm.base.content & CAIRO_CONTENT_ALPHA) == 0 && + i915_blend_op[op].dst_alpha) + { + if (sblend == SBLEND (DST_ALPHA)) + sblend = SBLEND (ONE); + else if (sblend == SBLEND (INV_DST_ALPHA)) + sblend = SBLEND (ZERO); + } + + /* i915 engine reads 8bit color buffer into green channel in cases + like color buffer blending etc., and also writes back green channel. + So with dst_alpha blend we should use color factor. See spec on + "8-bit rendering" */ + if (dst->intel.drm.format == CAIRO_FORMAT_A8 && i915_blend_op[op].dst_alpha) { + if (sblend == SBLEND (DST_ALPHA)) + sblend = SBLEND (DST_COLR); + else if (sblend == SBLEND (INV_DST_ALPHA)) + sblend = SBLEND (INV_DST_COLR); + } + + return sblend | dblend; +#undef SBLEND +#undef DBLEND +} + +static void +i915_shader_channel_init (union i915_shader_channel *channel) +{ + channel->type.vertex = (i915_vertex_shader_t) -1; + channel->type.fragment = (i915_fragment_shader_t) -1; + channel->type.pattern = (i915_shader_channel_t) -1; + channel->base.texfmt = TEXCOORDFMT_NOT_PRESENT; + channel->base.bo = NULL; + channel->base.n_samplers = 0; + channel->base.mode = 0; +} + +void +i915_shader_init (i915_shader_t *shader, + i915_surface_t *dst, + cairo_operator_t op) +{ + shader->device = i915_device (dst); + shader->target = dst; + shader->op = op; + + shader->blend = i915_get_blend (op, dst); + shader->need_combine = FALSE; + + shader->content = dst->intel.drm.base.content; + + i915_shader_channel_init (&shader->source); + i915_shader_channel_init (&shader->mask); + i915_shader_channel_init (&shader->clip); + i915_shader_channel_init (&shader->dst); +} + +static void +i915_set_shader_samplers (i915_device_t *device, + const i915_shader_t *shader) +{ + uint32_t n_samplers, n; + uint32_t samplers[4 * (1+2+8)]; + uint32_t mask, tu; + + n_samplers = + shader->source.base.n_samplers + + shader->mask.base.n_samplers + + shader->clip.base.n_samplers + + shader->dst.base.n_samplers; + assert (n_samplers <= 4); + + if (n_samplers == 0) + return; + + mask = (1 << n_samplers) - 1; + + /* We check for repeated setting of sample state mainly to catch + * continuation of text strings across multiple show-glyphs. + */ + tu = 0; + if (shader->source.base.bo != NULL) { + samplers[tu++] = shader->source.base.bo->base.handle; + samplers[tu++] = shader->source.base.sampler[0]; + samplers[tu++] = shader->source.base.sampler[1]; + for (n = 0; n < shader->source.base.n_samplers; n++) { + samplers[tu++] = shader->source.base.offset[n]; + samplers[tu++] = shader->source.base.map[2*n+0]; + samplers[tu++] = shader->source.base.map[2*n+1]; + } + } + if (shader->mask.base.bo != NULL) { + samplers[tu++] = shader->mask.base.bo->base.handle; + samplers[tu++] = shader->mask.base.sampler[0]; + samplers[tu++] = shader->mask.base.sampler[1]; + for (n = 0; n < shader->mask.base.n_samplers; n++) { + samplers[tu++] = shader->mask.base.offset[n]; + samplers[tu++] = shader->mask.base.map[2*n+0]; + samplers[tu++] = shader->mask.base.map[2*n+1]; + } + } + if (shader->clip.base.bo != NULL) { + samplers[tu++] = shader->clip.base.bo->base.handle; + samplers[tu++] = shader->clip.base.sampler[0]; + samplers[tu++] = shader->clip.base.sampler[1]; + for (n = 0; n < shader->clip.base.n_samplers; n++) { + samplers[tu++] = shader->clip.base.offset[n]; + samplers[tu++] = shader->clip.base.map[2*n+0]; + samplers[tu++] = shader->clip.base.map[2*n+1]; + } + } + if (shader->dst.base.bo != NULL) { + samplers[tu++] = shader->dst.base.bo->base.handle; + samplers[tu++] = shader->dst.base.sampler[0]; + samplers[tu++] = shader->dst.base.sampler[1]; + for (n = 0; n < shader->dst.base.n_samplers; n++) { + samplers[tu++] = shader->dst.base.offset[n]; + samplers[tu++] = shader->dst.base.map[2*n+0]; + samplers[tu++] = shader->dst.base.map[2*n+1]; + } + } + + if (tu == device->current_n_samplers && + memcmp (device->current_samplers, + samplers, + tu * sizeof (uint32_t)) == 0) + { + return; + } + device->current_n_samplers = tu; + memcpy (device->current_samplers, samplers, tu * sizeof (uint32_t)); + + if (device->current_source != NULL) + *device->current_source = 0; + if (device->current_mask != NULL) + *device->current_mask = 0; + if (device->current_clip != NULL) + *device->current_clip = 0; + +#if 0 + if (shader->source.type.pattern == PATTERN_TEXTURE) { + switch ((int) shader->source.surface.surface->type) { + case CAIRO_SURFACE_TYPE_DRM: + { + i915_surface_t *surface = + (i915_surface_t *) shader->source.surface.surface; + device->current_source = &surface->is_current_texture; + surface->is_current_texture |= CURRENT_SOURCE; + break; + } + + case I915_PACKED_PIXEL_SURFACE_TYPE: + { + i915_packed_pixel_surface_t *surface = + (i915_packed_pixel_surface_t *) shader->source.surface.surface; + device->current_source = &surface->is_current_texture; + surface->is_current_texture |= CURRENT_SOURCE; + break; + } + + default: + device->current_source = NULL; + break; + } + } else + device->current_source = NULL; + + if (shader->mask.type.pattern == PATTERN_TEXTURE) { + switch ((int) shader->mask.surface.surface->type) { + case CAIRO_SURFACE_TYPE_DRM: + { + i915_surface_t *surface = + (i915_surface_t *) shader->mask.surface.surface; + device->current_mask = &surface->is_current_texture; + surface->is_current_texture |= CURRENT_MASK; + break; + } + + case I915_PACKED_PIXEL_SURFACE_TYPE: + { + i915_packed_pixel_surface_t *surface = + (i915_packed_pixel_surface_t *) shader->mask.surface.surface; + device->current_mask = &surface->is_current_texture; + surface->is_current_texture |= CURRENT_MASK; + break; + } + + default: + device->current_mask = NULL; + break; + } + } else + device->current_mask = NULL; +#endif + + OUT_DWORD (_3DSTATE_MAP_STATE | (3 * n_samplers)); + OUT_DWORD (mask); + if (shader->source.base.bo != NULL) { + for (n = 0; n < shader->source.base.n_samplers; n++) { + i915_batch_emit_reloc (device, shader->source.base.bo, + shader->source.base.offset[n], + I915_GEM_DOMAIN_SAMPLER, 0); + OUT_DWORD (shader->source.base.map[2*n+0]); + OUT_DWORD (shader->source.base.map[2*n+1]); + } + } + if (shader->mask.base.bo != NULL) { + for (n = 0; n < shader->mask.base.n_samplers; n++) { + i915_batch_emit_reloc (device, shader->mask.base.bo, + shader->mask.base.offset[n], + I915_GEM_DOMAIN_SAMPLER, 0); + OUT_DWORD (shader->mask.base.map[2*n+0]); + OUT_DWORD (shader->mask.base.map[2*n+1]); + } + } + if (shader->clip.base.bo != NULL) { + for (n = 0; n < shader->clip.base.n_samplers; n++) { + i915_batch_emit_reloc (device, shader->clip.base.bo, + shader->clip.base.offset[n], + I915_GEM_DOMAIN_SAMPLER, 0); + OUT_DWORD (shader->clip.base.map[2*n+0]); + OUT_DWORD (shader->clip.base.map[2*n+1]); + } + } + if (shader->dst.base.bo != NULL) { + for (n = 0; n < shader->dst.base.n_samplers; n++) { + i915_batch_emit_reloc (device, shader->dst.base.bo, + shader->dst.base.offset[n], + I915_GEM_DOMAIN_SAMPLER, 0); + OUT_DWORD (shader->dst.base.map[2*n+0]); + OUT_DWORD (shader->dst.base.map[2*n+1]); + } + } + + OUT_DWORD (_3DSTATE_SAMPLER_STATE | (3 * n_samplers)); + OUT_DWORD (mask); + tu = 0; + if (shader->source.base.bo != NULL) { + for (n = 0; n < shader->source.base.n_samplers; n++) { + OUT_DWORD (shader->source.base.sampler[0]); + OUT_DWORD (shader->source.base.sampler[1] | + (tu << SS3_TEXTUREMAP_INDEX_SHIFT)); + OUT_DWORD (0x0); + tu++; + } + } + if (shader->mask.base.bo != NULL) { + for (n = 0; n < shader->mask.base.n_samplers; n++) { + OUT_DWORD (shader->mask.base.sampler[0]); + OUT_DWORD (shader->mask.base.sampler[1] | + (tu << SS3_TEXTUREMAP_INDEX_SHIFT)); + OUT_DWORD (0x0); + tu++; + } + } + if (shader->clip.base.bo != NULL) { + for (n = 0; n < shader->clip.base.n_samplers; n++) { + OUT_DWORD (shader->clip.base.sampler[0]); + OUT_DWORD (shader->clip.base.sampler[1] | + (tu << SS3_TEXTUREMAP_INDEX_SHIFT)); + OUT_DWORD (0x0); + tu++; + } + } + if (shader->dst.base.bo != NULL) { + for (n = 0; n < shader->dst.base.n_samplers; n++) { + OUT_DWORD (shader->dst.base.sampler[0]); + OUT_DWORD (shader->dst.base.sampler[1] | + (tu << SS3_TEXTUREMAP_INDEX_SHIFT)); + OUT_DWORD (0x0); + tu++; + } + } +} + +static uint32_t +i915_shader_get_texcoords (const i915_shader_t *shader) +{ + uint32_t texcoords; + uint32_t tu; + + texcoords = S2_TEXCOORD_NONE; + tu = 0; + if (shader->source.base.texfmt != TEXCOORDFMT_NOT_PRESENT) { + texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK); + texcoords |= S2_TEXCOORD_FMT (tu, shader->source.base.texfmt); + tu++; + } + if (shader->mask.base.texfmt != TEXCOORDFMT_NOT_PRESENT) { + texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK); + texcoords |= S2_TEXCOORD_FMT (tu, shader->mask.base.texfmt); + tu++; + } + if (shader->clip.base.texfmt != TEXCOORDFMT_NOT_PRESENT) { + texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK); + texcoords |= S2_TEXCOORD_FMT (tu, shader->clip.base.texfmt); + tu++; + } + if (shader->dst.base.texfmt != TEXCOORDFMT_NOT_PRESENT) { + texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK); + texcoords |= S2_TEXCOORD_FMT (tu, shader->dst.base.texfmt); + tu++; + } + + return texcoords; +} + +static void +i915_set_shader_mode (i915_device_t *device, + const i915_shader_t *shader) +{ + uint32_t texcoords; + uint32_t mask, cnt; + + texcoords = i915_shader_get_texcoords (shader); + + mask = cnt = 0; + + if (device->current_texcoords != texcoords) + mask |= I1_LOAD_S (2), cnt++; + + if (device->current_blend != shader->blend) + mask |= I1_LOAD_S (6), cnt++; + + if (cnt == 0) + return; + + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | mask | (cnt-1)); + + if (device->current_texcoords != texcoords) { + OUT_DWORD (texcoords); + device->current_texcoords = texcoords; + } + + if (device->current_blend != shader->blend) { + if (shader->blend) { + OUT_DWORD (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | + (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) | + shader->blend); + } else { + OUT_DWORD (S6_COLOR_WRITE_ENABLE); + } + + device->current_blend = shader->blend; + } +} + +static void +i915_set_constants (i915_device_t *device, + const uint32_t *constants, + uint32_t n_constants) +{ + uint32_t n; + + OUT_DWORD (_3DSTATE_PIXEL_SHADER_CONSTANTS | n_constants); + OUT_DWORD ((1 << (n_constants >> 2)) - 1); + + for (n = 0; n < n_constants; n++) + OUT_DWORD (constants[n]); + + device->current_n_constants = n_constants; + memcpy (device->current_constants, constants, n_constants*4); +} + +static inline uint32_t +pack_float (float f) +{ + union { + float f; + uint32_t ui; + } t; + t.f = f; + return t.ui; +} + +static uint32_t +pack_constants (const union i915_shader_channel *channel, + uint32_t *constants) +{ + uint32_t count = 0, n; + + switch (channel->type.fragment) { + case FS_ZERO: + case FS_ONE: + case FS_PURE: + case FS_DIFFUSE: + break; + + case FS_CONSTANT: + constants[count++] = pack_float (channel->solid.color.red); + constants[count++] = pack_float (channel->solid.color.green); + constants[count++] = pack_float (channel->solid.color.blue); + constants[count++] = pack_float (channel->solid.color.alpha); + break; + + case FS_LINEAR: + constants[count++] = pack_float (channel->linear.color0.red); + constants[count++] = pack_float (channel->linear.color0.green); + constants[count++] = pack_float (channel->linear.color0.blue); + constants[count++] = pack_float (channel->linear.color0.alpha); + + constants[count++] = pack_float (channel->linear.color1.red); + constants[count++] = pack_float (channel->linear.color1.green); + constants[count++] = pack_float (channel->linear.color1.blue); + constants[count++] = pack_float (channel->linear.color1.alpha); + break; + + case FS_RADIAL: + for (n = 0; n < ARRAY_LENGTH (channel->radial.constants); n++) + constants[count++] = pack_float (channel->radial.constants[n]); + break; + + case FS_TEXTURE: + case FS_YUV: + case FS_SPANS: + break; + } + + return count; +} + +static void +i915_set_shader_constants (i915_device_t *device, + const i915_shader_t *shader) +{ + uint32_t constants[4*4*3]; + unsigned n_constants; + + n_constants = 0; + if (shader->source.type.fragment == FS_DIFFUSE) { + uint32_t diffuse; + + diffuse = + ((shader->source.solid.color.alpha_short >> 8) << 24) | + ((shader->source.solid.color.red_short >> 8) << 16) | + ((shader->source.solid.color.green_short >> 8) << 8) | + ((shader->source.solid.color.blue_short >> 8) << 0); + + if (diffuse != device->current_diffuse) { + OUT_DWORD (_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_DWORD (diffuse); + device->current_diffuse = diffuse; + } + } else { + n_constants += pack_constants (&shader->source, constants + n_constants); + } + n_constants += pack_constants (&shader->mask, constants + n_constants); + + if (n_constants != 0 && + (device->current_n_constants != n_constants || + memcmp (device->current_constants, constants, n_constants*4))) + { + i915_set_constants (device, constants, n_constants); + } +} + +static cairo_bool_t +i915_shader_needs_update (const i915_shader_t *shader, + const i915_device_t *device) +{ + uint32_t count, n; + uint32_t buf[64]; + + if (device->current_target != shader->target) + return TRUE; + + count = + shader->source.base.n_samplers + + shader->mask.base.n_samplers + + shader->clip.base.n_samplers + + shader->dst.base.n_samplers; + if (count > 4) + return TRUE; + + if (count != 0) { + count *= 3; + if (shader->source.base.bo != NULL) + count += 3; + if (shader->mask.base.bo != NULL) + count += 3; + if (shader->clip.base.bo != NULL) + count += 3; + if (shader->dst.base.bo != NULL) + count += 3; + + if (count != device->current_n_samplers) + return TRUE; + + if (count != 0) { + count = 0; + if (shader->source.base.bo != NULL) { + buf[count++] = shader->source.base.bo->base.handle; + buf[count++] = shader->source.base.sampler[0]; + buf[count++] = shader->source.base.sampler[1]; + for (n = 0; n < shader->source.base.n_samplers; n++) { + buf[count++] = shader->source.base.offset[n]; + buf[count++] = shader->source.base.map[2*n+0]; + buf[count++] = shader->source.base.map[2*n+1]; + } + } + if (shader->mask.base.bo != NULL) { + buf[count++] = shader->mask.base.bo->base.handle; + buf[count++] = shader->mask.base.sampler[0]; + buf[count++] = shader->mask.base.sampler[1]; + for (n = 0; n < shader->mask.base.n_samplers; n++) { + buf[count++] = shader->mask.base.offset[n]; + buf[count++] = shader->mask.base.map[2*n+0]; + buf[count++] = shader->mask.base.map[2*n+1]; + } + } + if (shader->clip.base.bo != NULL) { + buf[count++] = shader->clip.base.bo->base.handle; + buf[count++] = shader->clip.base.sampler[0]; + buf[count++] = shader->clip.base.sampler[1]; + for (n = 0; n < shader->clip.base.n_samplers; n++) { + buf[count++] = shader->clip.base.offset[n]; + buf[count++] = shader->clip.base.map[2*n+0]; + buf[count++] = shader->clip.base.map[2*n+1]; + } + } + if (shader->dst.base.bo != NULL) { + buf[count++] = shader->dst.base.bo->base.handle; + buf[count++] = shader->dst.base.sampler[0]; + buf[count++] = shader->dst.base.sampler[1]; + for (n = 0; n < shader->dst.base.n_samplers; n++) { + buf[count++] = shader->dst.base.offset[n]; + buf[count++] = shader->dst.base.map[2*n+0]; + buf[count++] = shader->dst.base.map[2*n+1]; + } + } + + assert (count == device->current_n_samplers); + if (memcmp (device->current_samplers, buf, count * sizeof (uint32_t))) + return TRUE; + } + } + + if (i915_shader_get_texcoords (shader) != device->current_texcoords) + return TRUE; + if (device->current_blend != shader->blend) + return TRUE; + + count = 0; + if (shader->source.type.fragment == FS_DIFFUSE) { + uint32_t diffuse; + + diffuse = + ((shader->source.solid.color.alpha_short >> 8) << 24) | + ((shader->source.solid.color.red_short >> 8) << 16) | + ((shader->source.solid.color.green_short >> 8) << 8) | + ((shader->source.solid.color.blue_short >> 8) << 0); + + if (diffuse != device->current_diffuse) + return TRUE; + } else { + count += pack_constants (&shader->source, buf + count); + } + count += pack_constants (&shader->mask, buf + count); + + if (count && + (device->current_n_constants != count || + memcmp (device->current_constants, buf, count*4))) + { + return TRUE; + } + + n = (i915_shader_channel_key (&shader->source) << 0) | + (i915_shader_channel_key (&shader->mask) << 8) | + (i915_shader_channel_key (&shader->clip) << 16) | + (shader->op << 24) | + (((shader->content & CAIRO_CONTENT_ALPHA) == CAIRO_CONTENT_ALPHA) << 31); + return n != device->current_program; +} + +static void +i915_set_shader_target (i915_device_t *device, + const i915_shader_t *shader) +{ + i915_surface_t *dst; + intel_bo_t *bo; + uint32_t size; + + dst = shader->target; + if (device->current_target == dst) + return; + + bo = to_intel_bo (dst->intel.drm.bo); + assert (bo != NULL); + + OUT_DWORD (_3DSTATE_BUF_INFO_CMD); + OUT_DWORD (BUF_3D_ID_COLOR_BACK | + BUF_tiling (bo->tiling) | + BUF_3D_PITCH (dst->intel.drm.stride)); + OUT_RELOC (dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + + if (dst->colorbuf != device->current_colorbuf) { + OUT_DWORD (_3DSTATE_DST_BUF_VARS_CMD); + OUT_DWORD (dst->colorbuf); + device->current_colorbuf = dst->colorbuf; + } + + size = DRAW_YMAX (dst->intel.drm.height) | DRAW_XMAX (dst->intel.drm.width); + if (size != device->current_size) { + OUT_DWORD (_3DSTATE_DRAW_RECT_CMD); + OUT_DWORD (0); /* dither */ + OUT_DWORD (0); /* top-left */ + OUT_DWORD (size); + OUT_DWORD (0); /* origin */ + device->current_size = size; + } + + device->current_target = dst; +} + +int +i915_shader_num_texcoords (const i915_shader_t *shader) +{ + int cnt = 0; + + switch (shader->source.base.texfmt) { + default: + ASSERT_NOT_REACHED; + case TEXCOORDFMT_NOT_PRESENT: break; + case TEXCOORDFMT_2D: cnt += 2; break; + case TEXCOORDFMT_3D: cnt += 3; break; + case TEXCOORDFMT_4D: cnt += 4; break; + case TEXCOORDFMT_1D: cnt += 1; break; + case TEXCOORDFMT_2D_16: cnt += 1; break; + } + + switch (shader->mask.base.texfmt) { + default: + ASSERT_NOT_REACHED; + case TEXCOORDFMT_NOT_PRESENT: break; + case TEXCOORDFMT_2D: cnt += 2; break; + case TEXCOORDFMT_3D: cnt += 3; break; + case TEXCOORDFMT_4D: cnt += 4; break; + case TEXCOORDFMT_1D: cnt += 1; break; + case TEXCOORDFMT_2D_16: cnt += 1; break; + } + + switch (shader->clip.base.texfmt) { + default: + ASSERT_NOT_REACHED; + case TEXCOORDFMT_NOT_PRESENT: break; + case TEXCOORDFMT_2D: cnt += 2; break; + case TEXCOORDFMT_3D: cnt += 3; break; + case TEXCOORDFMT_4D: cnt += 4; break; + case TEXCOORDFMT_1D: cnt += 1; break; + case TEXCOORDFMT_2D_16: cnt += 1; break; + } + + switch (shader->dst.base.texfmt) { + default: + ASSERT_NOT_REACHED; + case TEXCOORDFMT_NOT_PRESENT: break; + case TEXCOORDFMT_2D: cnt += 2; break; + case TEXCOORDFMT_3D: cnt += 3; break; + case TEXCOORDFMT_4D: cnt += 4; break; + case TEXCOORDFMT_1D: cnt += 1; break; + case TEXCOORDFMT_2D_16: cnt += 1; break; + } + + return cnt; +} + +void +i915_shader_fini (i915_shader_t *shader) +{ + i915_device_t *device = i915_device (shader->target); + + switch (shader->source.type.pattern) { + case PATTERN_TEXTURE: + case PATTERN_BASE: + case PATTERN_LINEAR: + case PATTERN_RADIAL: + if (shader->source.base.bo != NULL) + cairo_drm_bo_destroy (&device->intel.base.base, &shader->source.base.bo->base); + break; + + default: + case PATTERN_CONSTANT: + break; + } + + switch (shader->mask.type.pattern) { + case PATTERN_TEXTURE: + case PATTERN_BASE: + case PATTERN_LINEAR: + case PATTERN_RADIAL: + if (shader->mask.base.bo != NULL) + cairo_drm_bo_destroy (&device->intel.base.base, &shader->mask.base.bo->base); + break; + + default: + case PATTERN_CONSTANT: + break; + } + + switch (shader->clip.type.pattern) { + case PATTERN_TEXTURE: + case PATTERN_BASE: + case PATTERN_LINEAR: + case PATTERN_RADIAL: + if (shader->clip.base.bo != NULL) + cairo_drm_bo_destroy (&device->intel.base.base, &shader->clip.base.bo->base); + break; + + default: + case PATTERN_CONSTANT: + break; + } +} + +void +i915_shader_set_clip (i915_shader_t *shader, + cairo_clip_t *clip) +{ + cairo_surface_t *clip_surface; + const cairo_rectangle_int_t *clip_extents; + union i915_shader_channel *channel; + i915_surface_t *s; + + clip_surface = _cairo_clip_get_surface (clip, &shader->target->intel.drm.base); + assert (clip_surface->status == CAIRO_STATUS_SUCCESS); + assert (clip_surface->type == CAIRO_SURFACE_TYPE_DRM); + + channel = &shader->clip; + channel->type.pattern = PATTERN_TEXTURE; + channel->type.vertex = VS_TEXTURE_16; + channel->base.texfmt = TEXCOORDFMT_2D_16; + channel->base.content = CAIRO_CONTENT_ALPHA; + + channel->type.fragment = FS_TEXTURE; + channel->surface.pixel = NONE; + + s = (i915_surface_t *) clip_surface; + channel->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo)); + channel->base.n_samplers = 1; + channel->base.offset[0] = s->offset; + channel->base.map[0] = s->map0; + channel->base.map[1] = s->map1; + + channel->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_NEAREST); + channel->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_scale (&shader->clip.base.matrix, + 1. / s->intel.drm.width, + 1. / s->intel.drm.height); + + clip_extents = _cairo_clip_get_extents (clip); + cairo_matrix_translate (&shader->clip.base.matrix, + NEAREST_BIAS + clip_extents->x, + NEAREST_BIAS + clip_extents->y); +} + +static cairo_status_t +i915_shader_check_aperture (i915_shader_t *shader, + i915_device_t *device) +{ + cairo_status_t status; + intel_bo_t *bo_array[4]; + uint32_t n = 0; + + if (shader->target != device->current_target) + bo_array[n++] = to_intel_bo (shader->target->intel.drm.bo); + + if (shader->source.base.bo != NULL) + bo_array[n++] = shader->source.base.bo; + + if (shader->mask.base.bo != NULL) + bo_array[n++] = shader->mask.base.bo; + + if (shader->clip.base.bo != NULL) + bo_array[n++] = shader->clip.base.bo; + + if (n == 0 || i915_check_aperture (device, bo_array, n)) + return CAIRO_STATUS_SUCCESS; + + status = i915_batch_flush (device); + if (unlikely (status)) + return status; + + assert (i915_check_aperture (device, bo_array, n)); + return CAIRO_STATUS_SUCCESS; +} + +static void +i915_shader_combine_mask (i915_shader_t *shader) +{ + if (shader->mask.type.fragment == (i915_fragment_shader_t) -1 || + shader->mask.type.fragment == FS_CONSTANT) + { + return; + } + + if (shader->mask.type.fragment == FS_PURE) { + if (shader->mask.solid.pure & (1<<3)) { + shader->mask.type.fragment = FS_ONE; + } else { + shader->mask.type.fragment = FS_ZERO; + } + } + + if (shader->mask.type.fragment == FS_ONE || + (shader->mask.base.content & CAIRO_CONTENT_ALPHA) == 0) + { + shader->mask.type.vertex = (i915_vertex_shader_t) -1; + shader->mask.type.fragment = (i915_fragment_shader_t) -1; + shader->mask.base.texfmt = TEXCOORDFMT_NOT_PRESENT; + shader->mask.base.mode = 0; + } + + if (shader->mask.type.fragment == FS_ZERO) { + shader->source.type.fragment = FS_ZERO; + shader->source.type.vertex = VS_CONSTANT; + shader->source.base.texfmt = TEXCOORDFMT_NOT_PRESENT; + shader->source.base.mode = 0; + } + + if (shader->source.type.fragment == FS_ZERO) { + shader->mask.type.vertex = (i915_vertex_shader_t) -1; + shader->mask.type.fragment = (i915_fragment_shader_t) -1; + shader->mask.base.texfmt = TEXCOORDFMT_NOT_PRESENT; + shader->mask.base.mode = 0; + } +} + +static void +i915_shader_setup_dst (i915_shader_t *shader) +{ + union i915_shader_channel *channel; + i915_surface_t *s; + + /* We need to manual blending if we have a clip surface and an unbounded op, + * or an extended blend mode. + */ + if (shader->need_combine || + (shader->op < CAIRO_OPERATOR_SATURATE && + (shader->clip.type.fragment == (i915_fragment_shader_t) -1 || + _cairo_operator_bounded_by_mask (shader->op)))) + { + return; + } + + shader->need_combine = TRUE; + + channel = &shader->dst; + channel->type.pattern = PATTERN_TEXTURE; + channel->type.vertex = VS_TEXTURE_16; + channel->base.texfmt = TEXCOORDFMT_2D_16; + channel->base.content = shader->content; + + channel->type.fragment = FS_TEXTURE; + channel->surface.pixel = NONE; + + s = shader->target; + channel->base.bo = to_intel_bo (s->intel.drm.bo); + channel->base.n_samplers = 1; + channel->base.offset[0] = s->offset; + channel->base.map[0] = s->map0; + channel->base.map[1] = s->map1; + + channel->base.sampler[0] = + (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) | + i915_texture_filter (CAIRO_FILTER_NEAREST); + channel->base.sampler[1] = + SS3_NORMALIZED_COORDS | + i915_texture_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_scale (&shader->dst.base.matrix, + 1. / s->intel.drm.width, + 1. / s->intel.drm.height); + + cairo_matrix_translate (&shader->dst.base.matrix, + NEAREST_BIAS, + NEAREST_BIAS); +} + +static void +i915_shader_combine_source (i915_shader_t *shader, + i915_device_t *device) +{ + if (device->last_source_fragment == shader->source.type.fragment) + return; + + if (device->last_source_fragment == FS_DIFFUSE) { + switch (shader->source.type.fragment) { + case FS_ONE: + case FS_PURE: + case FS_CONSTANT: + case FS_DIFFUSE: + shader->source.type.fragment = FS_DIFFUSE; + shader->source.base.mode = 0; + break; + case FS_ZERO: + case FS_LINEAR: + case FS_RADIAL: + case FS_TEXTURE: + case FS_YUV: + case FS_SPANS: + default: + break; + } + } + + device->last_source_fragment = shader->source.type.fragment; +} + +static inline float * +i915_composite_vertex (float *v, + const i915_shader_t *shader, + double x, double y) +{ + double s, t; + + /* Each vertex is: + * 2 vertex coordinates + * [0-2] source texture coordinates + * [0-2] mask texture coordinates + */ + + *v++ = x; *v++ = y; + switch (shader->source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->source.linear, x, y); + break; + case VS_RADIAL: + case VS_TEXTURE: + s = x, t = y; + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + s = x, t = y; + cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + switch (shader->mask.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *v++ = i915_shader_linear_texcoord (&shader->mask.linear, x, y); + break; + case VS_RADIAL: + s = x, t = y; + cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE: + s = x, t = y; + cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t); + *v++ = s; *v++ = t; + break; + case VS_TEXTURE_16: + s = x, t = y; + cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t); + *v++ = texcoord_2d_16 (s, t); + break; + } + + return v; +} + +static inline void +i915_shader_add_rectangle_general (const i915_shader_t *shader, + int x, int y, + int w, int h) +{ + float *vertices; + + vertices = i915_add_rectangle (shader->device); + vertices = i915_composite_vertex (vertices, shader, x + w, y + h); + vertices = i915_composite_vertex (vertices, shader, x, y + h); + vertices = i915_composite_vertex (vertices, shader, x, y); + /* XXX overflow! */ +} + +cairo_status_t +i915_shader_commit (i915_shader_t *shader, + i915_device_t *device) +{ + unsigned floats_per_vertex; + cairo_status_t status; + + i915_shader_combine_source (shader, device); + i915_shader_combine_mask (shader); + i915_shader_setup_dst (shader); + + if (i915_shader_needs_update (shader, device)) { + if (device->vertex_count) { + status = i915_vbo_flush (device); + if (unlikely (status)) + return status; + } + + status = i915_shader_check_aperture (shader, device); + if (unlikely (status)) + return status; + + i915_set_shader_target (device, shader); + i915_set_shader_mode (device, shader); + i915_set_shader_samplers (device, shader); + i915_set_shader_constants (device, shader); + i915_set_shader_program (device, shader); + } + + device->current_shader = shader; + shader->add_rectangle = i915_shader_add_rectangle_general; + + floats_per_vertex = 2 + i915_shader_num_texcoords (shader); + if (device->floats_per_vertex == floats_per_vertex) + return CAIRO_STATUS_SUCCESS; + + if (device->vertex_count) { + status = i915_vbo_flush (device); + if (unlikely (status)) + return status; + } + + if (device->vbo) { + device->batch_base[device->vbo_max_index] |= device->vertex_index; + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S (1) | 0); + device->vbo_max_index = device->batch.used; + OUT_DWORD ((floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + } + + device->floats_per_vertex = floats_per_vertex; + device->rectangle_size = floats_per_vertex * 3 * sizeof (float); + device->vertex_index = + (device->vbo_used + 4*floats_per_vertex - 1) / (4 * floats_per_vertex); + device->vbo_offset = 4 * device->vertex_index * floats_per_vertex; + + return CAIRO_STATUS_SUCCESS; +} diff --git a/src/drm/cairo-drm-i915-spans.c b/src/drm/cairo-drm-i915-spans.c new file mode 100644 index 00000000..0f1617d7 --- /dev/null +++ b/src/drm/cairo-drm-i915-spans.c @@ -0,0 +1,708 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Red Hat, Inc. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-composite-rectangles-private.h" +#include "cairo-boxes-private.h" +#include "cairo-error-private.h" +#include "cairo-drm-i915-private.h" + +/* Operates in either immediate or retained mode. + * When given a clip region we record the sequence of vbo and then + * replay them for each clip rectangle, otherwise we simply emit + * the vbo straight into the command stream. + */ + +typedef struct _i915_spans i915_spans_t; + +typedef float * +(*i915_get_rectangle_func_t) (i915_spans_t *spans); + +typedef void +(*i915_span_func_t) (i915_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha); + +struct _i915_spans { + cairo_span_renderer_t renderer; + + i915_device_t *device; + + int xmin, xmax; + cairo_bool_t is_bounded; + const cairo_rectangle_int_t *extents; + + i915_get_rectangle_func_t get_rectangle; + i915_span_func_t span; + i915_shader_t shader; + + cairo_region_t *clip_region; + cairo_bool_t need_clip_surface; + + struct vbo { + struct vbo *next; + intel_bo_t *bo; + unsigned int count; + } head, *tail; + + int rectangle_size; + + unsigned int vbo_offset; + float *vbo_base; +}; + +static float * +i915_emit_rectangle (i915_spans_t *spans) +{ + return i915_add_rectangle (spans->device); +} + +static float * +i915_accumulate_rectangle (i915_spans_t *spans) +{ + float *vertices; + uint32_t size; + + size = spans->rectangle_size; + if (unlikely (spans->vbo_offset + size > I915_VBO_SIZE)) { + struct vbo *vbo; + + intel_bo_unmap (spans->tail->bo); + + vbo = malloc (sizeof (struct vbo)); + if (unlikely (vbo == NULL)) { + /* throw error! */ + } + + spans->tail->next = vbo; + spans->tail = vbo; + + vbo->next = NULL; + vbo->bo = intel_bo_create (&spans->device->intel, I915_VBO_SIZE, FALSE); + vbo->count = 0; + + spans->vbo_offset = 0; + spans->vbo_base = intel_bo_map (&spans->device->intel, vbo->bo); + } + + vertices = spans->vbo_base + spans->vbo_offset; + spans->vbo_offset += size; + spans->tail->count += 3; + + return vertices; +} + +static void +i915_span_constant (i915_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha) +{ + float *vertices; + float a = alpha / 255.; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + *vertices++ = a; +} + +static void +i915_span_linear (i915_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha) +{ + float *vertices; + float a = alpha / 255.; + double s, t; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + s = x0, t = y0; + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + s = x1, t = y0; + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + s = x1, t = y1; + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + *vertices++ = a; +} + +static void +i915_span_radial (i915_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha) +{ + float *vertices; + float a = alpha / 255.; + double s, t; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + s = x0, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + s = x1, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + s = x1, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; +} + +static void +i915_span_texture (i915_spans_t *spans, + int x0, int x1, int y0, int y1, int alpha) +{ + float *vertices; + float a = alpha / 255.; + double s, t; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + s = x0, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + s = x1, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + s = x1, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + *vertices++ = a; +} + +static void +i915_span_texture16 (i915_spans_t *spans, + int x0, int x1, int y0, int y1, int alpha) +{ + float *vertices; + float a = alpha / 255.; + double s, t; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + s = x0, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + s = x1, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + s = x1, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + *vertices++ = a; +} + +static void +i915_span_generic (i915_spans_t *spans, + int x0, int x1, int y0, int y1, int alpha) +{ + double s, t; + float *vertices; + + /* Each vertex is: + * 2 vertex coordinates + * [0-2] source texture coordinates + * 1 alpha value. + * [0,2] clip mask coordinates + */ + + vertices = spans->get_rectangle (spans); + + /* bottom right */ + *vertices++ = x1; *vertices++ = y1; + s = x1, t = y1; + switch (spans->shader.source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + break; + case VS_RADIAL: + case VS_TEXTURE: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + break; + } + *vertices++ = alpha; + if (spans->need_clip_surface) { + s = x1, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + } + + /* bottom left */ + *vertices++ = x0; *vertices++ = y1; + s = x0, t = y1; + switch (spans->shader.source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + break; + case VS_RADIAL: + case VS_TEXTURE: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + break; + } + *vertices++ = alpha; + if (spans->need_clip_surface) { + s = x0, t = y1; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + } + + /* top left */ + *vertices++ = x0; *vertices++ = y0; + s = x0, t = y0; + switch (spans->shader.source.type.vertex) { + case VS_CONSTANT: + break; + case VS_LINEAR: + *vertices++ = i915_shader_linear_texcoord (&spans->shader.source.linear, s, t); + break; + case VS_RADIAL: + case VS_TEXTURE: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + break; + case VS_TEXTURE_16: + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = texcoord_2d_16 (s, t); + break; + } + *vertices++ = alpha; + if (spans->need_clip_surface) { + s = x0, t = y0; + cairo_matrix_transform_point (&spans->shader.source.base.matrix, &s, &t); + *vertices++ = s; *vertices++ = t; + } +} + +static cairo_status_t +i915_bounded_spans_mono (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i915_spans_t *spans = abstract_renderer; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + do { + if (half[0].coverage >= 128) { + spans->span (spans, + half[0].x, half[1].x, + y, y + height, + 255); + } + half++; + } while (--num_spans > 1); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_bounded_spans (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i915_spans_t *spans = abstract_renderer; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + do { + if (half[0].coverage) { + spans->span (spans, + half[0].x, half[1].x, + y, y + height, + half[0].coverage); + } + half++; + } while (--num_spans > 1); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_unbounded_spans (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i915_spans_t *spans = abstract_renderer; + + if (num_spans == 0) { + spans->span (spans, + spans->xmin, spans->xmax, + y, y + height, + 0); + return CAIRO_STATUS_SUCCESS; + } + + if (half[0].x != spans->xmin) { + spans->span (spans, + spans->xmin, half[0].x, + y, y + height, + 0); + } + + do { + spans->span (spans, + half[0].x, half[1].x, + y, y + height, + half[0].coverage); + half++; + } while (--num_spans > 1); + + if (half[0].x != spans->xmax) { + spans->span (spans, + half[0].x, spans->xmax, + y, y + height, + 0); + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_unbounded_spans_mono (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i915_spans_t *spans = abstract_renderer; + + if (num_spans == 0) { + spans->span (spans, + spans->xmin, spans->xmax, + y, y + height, + 0); + return CAIRO_STATUS_SUCCESS; + } + + if (half[0].x != spans->xmin) { + spans->span (spans, + spans->xmin, half[0].x, + y, y + height, + 0); + } + + do { + int alpha = 0; + if (half[0].coverage >= 128) + alpha = 255; + spans->span (spans, + half[0].x, half[1].x, + y, y + height, + alpha); + half++; + } while (--num_spans > 1); + + if (half[0].x != spans->xmax) { + spans->span (spans, + half[0].x, spans->xmax, + y, y + height, + 0); + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_spans_init (i915_spans_t *spans, + i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + cairo_status_t status; + + spans->device = (i915_device_t *) dst->intel.drm.base.device; + + spans->is_bounded = extents->is_bounded; + if (extents->is_bounded) { + if (antialias == CAIRO_ANTIALIAS_NONE) + spans->renderer.render_rows = i915_bounded_spans_mono; + else + spans->renderer.render_rows = i915_bounded_spans; + + spans->extents = &extents->bounded; + } else { + if (antialias == CAIRO_ANTIALIAS_NONE) + spans->renderer.render_rows = i915_unbounded_spans_mono; + else + spans->renderer.render_rows = i915_unbounded_spans; + + spans->extents = &extents->unbounded; + } + spans->xmin = spans->extents->x; + spans->xmax = spans->extents->x + spans->extents->width; + + spans->clip_region = NULL; + spans->need_clip_surface = FALSE; + if (clip != NULL) { + cairo_region_t *clip_region = NULL; + + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + spans->clip_region = clip_region; + spans->need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + } + + spans->head.next = NULL; + spans->head.bo = NULL; + spans->head.count = 0; + spans->tail = &spans->head; + + if (spans->clip_region == NULL) { + spans->get_rectangle = i915_emit_rectangle; + } else { + assert (! extents->is_bounded); + spans->get_rectangle = i915_accumulate_rectangle; + spans->head.bo = intel_bo_create (&spans->device->intel, + I915_VBO_SIZE, FALSE); + if (unlikely (spans->head.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + spans->vbo_base = intel_bo_map (&spans->device->intel, spans->head.bo); + } + spans->vbo_offset = 0; + + i915_shader_init (&spans->shader, dst, op); + if (spans->need_clip_surface) + i915_shader_set_clip (&spans->shader, clip); + + status = i915_shader_acquire_pattern (&spans->shader, &spans->shader.source, + pattern, &extents->bounded); + if (unlikely (status)) + return status; + + if (! spans->need_clip_surface) { + switch (spans->shader.source.type.vertex) { + case VS_CONSTANT: + spans->span = i915_span_constant; + break; + case VS_LINEAR: + spans->span = i915_span_linear; + break; + case VS_RADIAL: + spans->span = i915_span_radial; + break; + case VS_TEXTURE: + spans->span = i915_span_texture; + break; + case VS_TEXTURE_16: + spans->span = i915_span_texture16; + break; + default: + spans->span = i915_span_generic; + break; + } + } else { + spans->span = i915_span_generic; + } + + spans->rectangle_size = 3 * (2 + i915_shader_num_texcoords (&spans->shader)); + return CAIRO_STATUS_SUCCESS; +} + +static void +i915_spans_fini (i915_spans_t *spans) +{ + i915_shader_fini (&spans->shader); + + if (spans->head.bo != NULL) { + struct vbo *vbo, *next; + + intel_bo_destroy (&spans->device->intel, spans->head.bo); + for (vbo = spans->head.next; vbo != NULL; vbo = next) { + next = vbo->next; + intel_bo_destroy (&spans->device->intel, vbo->bo); + free (vbo); + } + } +} + +cairo_status_t +i915_clip_and_composite_spans (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + i915_spans_func_t draw_func, + void *draw_closure, + const cairo_composite_rectangles_t*extents, + cairo_clip_t *clip) +{ + i915_spans_t spans; + i915_device_t *device; + cairo_status_t status; + struct vbo *vbo; + + if (op == CAIRO_OPERATOR_CLEAR) { + pattern = &_cairo_pattern_white.base; + op = CAIRO_OPERATOR_DEST_OUT; + } + + status = i915_spans_init (&spans, dst, op, pattern, antialias, clip, extents); + if (unlikely (status)) + return status; + + spans.shader.mask.base.texfmt = TEXCOORDFMT_1D; + spans.shader.mask.base.content = CAIRO_CONTENT_ALPHA; + spans.shader.mask.type.fragment = FS_SPANS; + + status = cairo_device_acquire (dst->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SPANS; + + device = i915_device (dst); + status = i915_shader_commit (&spans.shader, device); + if (unlikely (status)) + goto CLEANUP_DEVICE; + + status = draw_func (draw_closure, &spans.renderer, spans.extents); + if (spans.clip_region != NULL && status == CAIRO_STATUS_SUCCESS) { + intel_bo_unmap (spans.tail->bo); + + i915_vbo_finish (device); + + OUT_DWORD (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT); + for (vbo = &spans.head; vbo != NULL; vbo = vbo->next) { + int i, num_rectangles; + + /* XXX require_space & batch_flush */ + + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S (0) | I1_LOAD_S (1) | 1); + i915_batch_emit_reloc (device, vbo->bo, 0, + I915_GEM_DOMAIN_VERTEX, 0); + OUT_DWORD ((device->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (device->floats_per_vertex << S1_VERTEX_PITCH_SHIFT) | + vbo->count); + + num_rectangles = cairo_region_num_rectangles (spans.clip_region); + for (i = 0; i < num_rectangles; i++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (spans.clip_region, i, &rect); + + OUT_DWORD (_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_DWORD (SCISSOR_RECT_0_XMIN (rect.x) | + SCISSOR_RECT_0_YMIN (rect.y)); + OUT_DWORD (SCISSOR_RECT_0_XMAX (rect.x + rect.width) | + SCISSOR_RECT_0_YMAX (rect.y + rect.height)); + + OUT_DWORD (PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | vbo->count); + OUT_DWORD (0); + } + } + OUT_DWORD (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + } + +CLEANUP_DEVICE: + cairo_device_release (dst->intel.drm.base.device); +CLEANUP_SPANS: + i915_spans_fini (&spans); + + return status; +} diff --git a/src/drm/cairo-drm-i915-surface.c b/src/drm/cairo-drm-i915-surface.c new file mode 100644 index 00000000..2079ac96 --- /dev/null +++ b/src/drm/cairo-drm-i915-surface.c @@ -0,0 +1,1996 @@ +/* Cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Chris Wilson + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * ************************************************************************** + * This work was initially based upon xf86-video-intel/src/i915_render.c: + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Eric Anholt <eric@anholt.net> + * + * ************************************************************************** + * and also upon libdrm/intel/intel_bufmgr_gem.c: + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Eric Anholt <eric@anholt.net> + * Dave Airlie <airlied@linux.ie> + */ + +/* XXX + * + * - Per thread context? Would it actually avoid many locks? + * + */ + +#include "cairoint.h" + +#include "cairo-drm-private.h" +#include "cairo-drm-ioctl-private.h" +#include "cairo-drm-intel-private.h" +#include "cairo-drm-intel-command-private.h" +#include "cairo-drm-intel-ioctl-private.h" +#include "cairo-drm-i915-private.h" + +#include "cairo-boxes-private.h" +#include "cairo-cache-private.h" +#include "cairo-composite-rectangles-private.h" +#include "cairo-error-private.h" +#include "cairo-freelist-private.h" +#include "cairo-list-private.h" +#include "cairo-path-fixed-private.h" +#include "cairo-region-private.h" +#include "cairo-surface-offset-private.h" + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <errno.h> + +static cairo_int_status_t +i915_surface_fill (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t*source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip); + +static const uint32_t i915_batch_setup[] = { + /* Disable line anti-aliasing */ + _3DSTATE_AA_CMD, + + /* Disable independent alpha blend */ + _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) | + IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT) | + IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT), + + /* Disable texture crossbar */ + _3DSTATE_COORD_SET_BINDINGS | + CSB_TCB (0, 0) | + CSB_TCB (1, 1) | + CSB_TCB (2, 2) | + CSB_TCB (3, 3) | + CSB_TCB (4, 4) | + CSB_TCB (5, 5) | + CSB_TCB (6, 6) | + CSB_TCB (7, 7), + + _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | LINE_STRIP_PROVOKE_VRTX (1) | + ENABLE_TRI_FAN_PROVOKE_VRTX | TRI_FAN_PROVOKE_VRTX (2) | + ENABLE_TEXKILL_3D_4D | TEXKILL_4D, + + _3DSTATE_MODES_4_CMD | ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC (LOGICOP_COPY), + + _3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S (2) | + I1_LOAD_S (3) | + I1_LOAD_S (4) | + I1_LOAD_S (5) | + I1_LOAD_S (6) | + 4, + S2_TEXCOORD_NONE, + 0, /* Disable texture coordinate wrap-shortest */ + (1 << S4_POINT_WIDTH_SHIFT) | + S4_LINE_WIDTH_ONE | + S4_FLATSHADE_ALPHA | + S4_FLATSHADE_FOG | + S4_FLATSHADE_SPECULAR | + S4_FLATSHADE_COLOR | + S4_CULLMODE_NONE | + S4_VFMT_XY, + 0, /* Disable stencil buffer */ + S6_COLOR_WRITE_ENABLE, + + _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT, + _3DSTATE_DEPTH_SUBRECT_DISABLE, + + /* disable indirect state */ + _3DSTATE_LOAD_INDIRECT, + 0, + + _3DSTATE_STIPPLE, + 0, + + _3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE, +}; + +static const cairo_surface_backend_t i915_surface_backend; + +#define NEAREST_BIAS (-.375) + +static cairo_surface_t * +i915_surface_create_from_cacheable_image (cairo_drm_device_t *base_dev, + cairo_surface_t *source); + +static cairo_status_t +i915_bo_exec (i915_device_t *device, intel_bo_t *bo, uint32_t offset) +{ + struct drm_i915_gem_execbuffer2 execbuf; + int ret, cnt, i; + + /* Add the batch buffer to the validation list. */ + cnt = device->batch.exec_count; + if (cnt > 0 && bo->base.handle == device->batch.exec[cnt-1].handle) + i = cnt - 1; + else + i = device->batch.exec_count++; + device->batch.exec[i].handle = bo->base.handle; + device->batch.exec[i].relocation_count = device->batch.reloc_count; + device->batch.exec[i].relocs_ptr = (uintptr_t) device->batch.reloc; + device->batch.exec[i].alignment = 0; + device->batch.exec[i].offset = 0; + device->batch.exec[i].flags = 0; + device->batch.exec[i].rsvd1 = 0; + device->batch.exec[i].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) device->batch.exec; + execbuf.buffer_count = device->batch.exec_count; + execbuf.batch_start_offset = offset; + execbuf.batch_len = (device->batch.used << 2) + sizeof (device->batch_header); + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.num_cliprects = 0; + execbuf.cliprects_ptr = 0; + execbuf.flags = 0; + execbuf.rsvd1 = 0; + execbuf.rsvd2 = 0; + + if (I915_VERBOSE && device->debug & I915_DEBUG_EXEC) { + int n; + fprintf (stderr, + "Executing batch: %d+%d bytes, %d buffers, %d relocations\n", + execbuf.batch_start_offset, + execbuf.batch_len, + device->batch.exec_count, + device->batch.reloc_count); + for (n = 0; n < device->batch.exec_count; n++) { + fprintf (stderr, " exec[%d] = %d\n", n, + device->batch.exec[n].handle); + } + for (n = 0; n < device->batch.reloc_count; n++) { + fprintf (stderr, " reloc[%d] = %d @ %qx\n", n, + device->batch.reloc[n].target_handle, + (unsigned long long) device->batch.reloc[n].offset); + } + } + + do { + ret = ioctl (device->intel.base.fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); + } while (ret != 0 && errno == EINTR); + + if (I915_VERBOSE && ret) { + int n; + + fprintf (stderr, "Batch submission failed: %d\n", errno); + fprintf (stderr, " relocation entries: %d/%d\n", + device->batch.reloc_count, I915_MAX_RELOCS); + fprintf (stderr, " gtt size: %zd/%zd\n", + device->batch.gtt_size, device->intel.gtt_avail_size); + + fprintf (stderr, " buffers:\n"); + for (n = 0; n < cnt; n++) { + fprintf (stderr, " exec[%d] = %d\n", + n, device->batch.target_bo[n]->base.size); + } + + intel_dump_batchbuffer (device->batch_header, + device->batch.used, + device->intel.base.chip_id); + } + + VG (VALGRIND_MAKE_MEM_DEFINED (device->batch.exec, sizeof (device->batch.exec[0]) * i)); + + bo->offset = device->batch.exec[i].offset; + while (cnt--) { + device->batch.target_bo[cnt]->offset = device->batch.exec[cnt].offset; + device->batch.target_bo[cnt]->exec = NULL; + device->batch.target_bo[cnt]->batch_read_domains = 0; + device->batch.target_bo[cnt]->batch_write_domain = 0; + intel_bo_destroy (&device->intel, device->batch.target_bo[cnt]); + } + + device->batch.exec_count = 0; + device->batch.reloc_count = 0; + + device->batch.gtt_size = I915_BATCH_SIZE; + + return ret == 0 ? CAIRO_STATUS_SUCCESS : _cairo_error (CAIRO_STATUS_NO_MEMORY); +} + +void +i915_batch_add_reloc (i915_device_t *device, + uint32_t pos, + intel_bo_t *bo, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain) +{ + int index; + + if (bo->exec == NULL) { + device->batch.gtt_size += bo->base.size; + + index = device->batch.exec_count++; + device->batch.exec[index].handle = bo->base.handle; + device->batch.exec[index].relocation_count = 0; + device->batch.exec[index].relocs_ptr = 0; + device->batch.exec[index].alignment = 0; + device->batch.exec[index].offset = 0; + device->batch.exec[index].flags = 0; + device->batch.exec[index].rsvd1 = 0; + device->batch.exec[index].rsvd2 = 0; + + device->batch.target_bo[index] = intel_bo_reference (bo); + + bo->exec = &device->batch.exec[index]; + } + + index = device->batch.reloc_count++; + device->batch.reloc[index].offset = (pos << 2) + sizeof (device->batch_header); + device->batch.reloc[index].delta = offset; + device->batch.reloc[index].target_handle = bo->base.handle; + device->batch.reloc[index].read_domains = read_domains; + device->batch.reloc[index].write_domain = write_domain; + device->batch.reloc[index].presumed_offset = bo->offset; + + assert (write_domain == 0 || bo->batch_write_domain == 0 || bo->batch_write_domain == write_domain); + bo->batch_read_domains |= read_domains; + bo->batch_write_domain |= write_domain; +} + +void +i915_vbo_finish (i915_device_t *device) +{ + if (device->vbo_used == 0) + return; + + if (device->vbo || i915_batch_space (device) < (int32_t) device->vbo_used) { + intel_bo_t *vbo; + + if (device->vertex_count) { + if (device->vbo == 0) { + /* XXX unchecked, must fit! */ + /* XXX batch_flush and i915_shader_commit (device, device->shader)); */ + assert (i915_check_aperture_size (device, 1, + (device->vbo_used + 4095) & -4096)); + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S (0) | + I1_LOAD_S (1) | + 1); + device->vbo = device->batch.used++; + device->vbo_max_index = device->batch.used; + OUT_DWORD ((device->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (device->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + } + + OUT_DWORD (PRIM3D_RECTLIST | + PRIM3D_INDIRECT_SEQUENTIAL | + device->vertex_count); + OUT_DWORD (device->vertex_index); + } + + if (I915_VERBOSE && device->debug & I915_DEBUG_BUFFER) { + fprintf (stderr, "Creating vertex buffer: %d bytes\n", + device->vbo_used); + } + + if (device->last_vbo != NULL) + intel_bo_destroy (&device->intel, device->last_vbo); + + device->batch_base[device->vbo_max_index] |= device->vertex_index + device->vertex_count; + + /* will include a few bytes of inter-array padding */ + vbo = intel_bo_create (&device->intel, device->vbo_used, FALSE); + i915_batch_fill_reloc (device, device->vbo, + vbo, 0, + I915_GEM_DOMAIN_VERTEX, 0); + intel_bo_write (&device->intel, vbo, 0, device->vbo_used, device->vbo_base); + device->last_vbo = vbo; + device->last_vbo_offset = (device->vbo_used+7)&-8; + device->last_vbo_space = vbo->base.size - device->last_vbo_offset; + + device->vbo = 0; + } + else + { + /* Only a single rectlist in this batch, and no active vertex buffer. */ + OUT_DWORD (PRIM3D_RECTLIST | (device->vbo_used / 4 - 1)); + + memcpy (BATCH_PTR (device), device->vbo_base, device->vbo_used); + device->batch.used += device->vbo_used >> 2; + } + + device->vbo_used = device->vbo_offset = 0; + device->vertex_index = device->vertex_count = 0; +} + +/* XXX improve state tracker/difference and flush state on vertex emission */ +static void +i915_device_reset (i915_device_t *device) +{ + if (device->current_source != NULL) + *device->current_source = 0; + if (device->current_mask != NULL) + *device->current_mask = 0; + if (device->current_clip != NULL) + *device->current_clip = 0; + + device->current_target = NULL; + device->current_size = 0; + device->current_source = NULL; + device->current_mask = NULL; + device->current_clip = NULL; + device->current_shader = 0; + device->current_texcoords = ~0; + device->current_blend = 0; + device->current_n_constants = 0; + device->current_n_samplers = 0; + device->current_colorbuf = 0; + device->current_diffuse = 0; + device->current_program = ~0; + + device->last_source_fragment = ~0; + + device->floats_per_vertex = 0; +} + +static void +i915_batch_cleanup (i915_device_t *device) +{ + int i; + + for (i = 0; i < device->batch.exec_count; i++) + intel_bo_destroy (&device->intel, device->batch.target_bo[i]); + + device->batch.exec_count = 0; + device->batch.reloc_count = 0; +} + +cairo_status_t +i915_batch_flush (i915_device_t *device) +{ + intel_bo_t *batch; + cairo_status_t status; + uint32_t length, offset; + int n; + + i915_vbo_finish (device); + + if (device->batch.used == 0) + return CAIRO_STATUS_SUCCESS; + + i915_batch_emit_dword (device, MI_BATCH_BUFFER_END); + if ((device->batch.used & 1) != (sizeof (device->batch_header) & 4)) + i915_batch_emit_dword (device, MI_NOOP); + + length = (device->batch.used << 2) + sizeof (device->batch_header); + + if (I915_VERBOSE && device->debug & I915_DEBUG_BATCH) + intel_dump_batchbuffer (device->batch_header, length, device->intel.base.chip_id); + + intel_glyph_cache_unmap(&device->intel); + + /* NB: it is faster to copy the data then map/unmap the batch, + * presumably because we frequently only use a small part of the buffer. + */ + batch = NULL; + if (device->last_vbo) { + if (length <= device->last_vbo_space) { + if (I915_VERBOSE && device->debug & I915_DEBUG_BATCH) { + fprintf (stderr, "Packing batch buffer into last vbo: %d+%d bytes\n", length, device->last_vbo_offset); + } + batch = device->last_vbo; + offset = device->last_vbo_offset; + + /* fixup the relocations */ + for (n = 0; n < device->batch.reloc_count; n++) + device->batch.reloc[n].offset += offset; + } else + intel_bo_destroy (&device->intel, device->last_vbo); + device->last_vbo = NULL; + } + if (batch == NULL) { + if (I915_VERBOSE && device->debug & I915_DEBUG_BUFFER) { + fprintf (stderr, "Creating batch buffer: %d bytes\n", length); + } + batch = intel_bo_create (&device->intel, length, FALSE); + if (unlikely (batch == NULL)) { + status = _cairo_error (CAIRO_STATUS_NO_MEMORY); + i915_batch_cleanup (device); + goto BAIL; + } + + offset = 0; + } + intel_bo_write (&device->intel, batch, offset, length, device->batch_header); + status = i915_bo_exec (device, batch, offset); + + if (device->debug & I915_DEBUG_SYNC && status == CAIRO_STATUS_SUCCESS) + intel_bo_wait (&device->intel, batch); + + intel_bo_destroy (&device->intel, batch); + +BAIL: + device->batch.used = 0; + + intel_glyph_cache_unpin (&device->intel); + intel_snapshot_cache_thaw (&device->intel); + + i915_device_reset (device); + + return status; +} + +cairo_status_t +i915_vbo_flush (i915_device_t *device) +{ + assert (device->vertex_count); + + if (device->vbo == 0) { + assert (device->floats_per_vertex); + + if (i915_batch_space (device) < 9 || + ! i915_check_aperture_size (device, 1, I915_VBO_SIZE)) + { + return i915_batch_flush (device); + } + + OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S (0) | + I1_LOAD_S (1) | + 1); + device->vbo = device->batch.used++; + device->vbo_max_index = device->batch.used; + OUT_DWORD ((device->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (device->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + } + + OUT_DWORD (PRIM3D_RECTLIST | + PRIM3D_INDIRECT_SEQUENTIAL | + device->vertex_count); + OUT_DWORD (device->vertex_index); + + device->vertex_index += device->vertex_count; + device->vertex_count = 0; + + return CAIRO_STATUS_SUCCESS; +} + +#if 0 +static float * +i915_add_rectangles (i915_device_t *device, int num_rects, int *count) +{ + float *vertices; + uint32_t size; + int cnt; + + assert (device->floats_per_vertex); + + size = device->rectangle_size; + if (unlikely (device->vbo_offset + size > I915_VBO_SIZE)) + i915_vbo_finish (device); + + vertices = (float *) (device->vbo_base + device->vbo_offset); + cnt = (I915_VBO_SIZE - device->vbo_offset) / size; + if (cnt > num_rects) + cnt = num_rects; + device->vbo_used = device->vbo_offset += size * cnt; + device->vertex_count += 3 * cnt; + *count = cnt; + return vertices; +} +#endif + +static cairo_status_t +i915_surface_finish (void *abstract_surface) +{ + i915_surface_t *surface = abstract_surface; + i915_device_t *device = i915_device (surface); + + if (surface->stencil != NULL) + intel_bo_destroy (&device->intel, surface->stencil); + + if (surface->is_current_texture) { + if (surface->is_current_texture & CURRENT_SOURCE) + device->current_source = NULL; + if (surface->is_current_texture & CURRENT_MASK) + device->current_mask = NULL; + if (surface->is_current_texture & CURRENT_CLIP) + device->current_clip = NULL; + device->current_n_samplers = 0; + } + + if (surface == device->current_target) + device->current_target = NULL; + + if (surface->cache != NULL) { + i915_image_private_t *node = surface->cache; + intel_buffer_cache_t *cache = node->container; + + if (--cache->ref_count == 0) { + intel_bo_destroy (&device->intel, cache->buffer.bo); + _cairo_rtree_fini (&cache->rtree); + cairo_list_del (&cache->link); + free (cache); + } else { + node->node.state = CAIRO_RTREE_NODE_AVAILABLE; + cairo_list_move (&node->node.link, &cache->rtree.available); + _cairo_rtree_node_collapse (&cache->rtree, node->node.parent); + } + } + + return _cairo_drm_surface_finish (&surface->intel.drm); +} + +static cairo_status_t +i915_surface_batch_flush (i915_surface_t *surface) +{ + cairo_status_t status; + intel_bo_t *bo; + + assert (surface->intel.drm.fallback == NULL); + + bo = to_intel_bo (surface->intel.drm.bo); + if (bo == NULL || bo->batch_write_domain == 0) + return CAIRO_STATUS_SUCCESS; + + status = cairo_device_acquire (surface->intel.drm.base.device); + if (unlikely (status)) + return status; + + status = i915_batch_flush (i915_device (surface)); + cairo_device_release (surface->intel.drm.base.device); + + return status; +} + +static cairo_status_t +i915_surface_flush (void *abstract_surface) +{ + i915_surface_t *surface = abstract_surface; + + if (surface->intel.drm.fallback == NULL) { + if (surface->intel.drm.base.finished) { + /* Forgo flushing on finish as the user cannot access the surface directly. */ + return CAIRO_STATUS_SUCCESS; + } + + return i915_surface_batch_flush (surface); + } + + return intel_surface_flush (abstract_surface); +} + +/* rasterisation */ + +static cairo_status_t +_composite_boxes_spans (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents) +{ + cairo_boxes_t *boxes = closure; + cairo_rectangular_scan_converter_t converter; + struct _cairo_boxes_chunk *chunk; + cairo_status_t status; + int i; + + _cairo_rectangular_scan_converter_init (&converter, extents); + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + cairo_box_t *box = chunk->base; + for (i = 0; i < chunk->count; i++) { + status = _cairo_rectangular_scan_converter_add_box (&converter, &box[i], 1); + if (unlikely (status)) + goto CLEANUP; + } + } + + status = converter.base.generate (&converter.base, renderer); + + CLEANUP: + converter.base.destroy (&converter.base); + return status; +} + +cairo_status_t +i915_fixup_unbounded (i915_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip) +{ + i915_shader_t shader; + cairo_status_t status; + + i915_shader_init (&shader, dst, CAIRO_OPERATOR_CLEAR); + + if (clip != NULL) { + cairo_region_t *clip_region = NULL; + + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + assert (clip_region == NULL); + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i915_shader_set_clip (&shader, clip); + } else { + if (extents->bounded.width == extents->unbounded.width && + extents->bounded.height == extents->unbounded.height) + { + return CAIRO_STATUS_SUCCESS; + } + } + + status = i915_shader_commit (&shader, + (i915_device_t *) dst->intel.drm.base.device); + if (unlikely (status)) + return status; + + /* top */ + if (extents->bounded.y != extents->unbounded.y) { + shader.add_rectangle (&shader, + extents->unbounded.x, + extents->unbounded.y, + extents->unbounded.width, + extents->bounded.y - extents->unbounded.y); + } + + /* left */ + if (extents->bounded.x != extents->unbounded.x) { + shader.add_rectangle (&shader, + extents->unbounded.x, + extents->bounded.y, + extents->bounded.x - extents->unbounded.x, + extents->bounded.height); + } + + /* right */ + if (extents->bounded.x + extents->bounded.width != extents->unbounded.x + extents->unbounded.width) { + shader.add_rectangle (&shader, + extents->bounded.x + extents->bounded.width, + extents->bounded.y, + extents->unbounded.x + extents->unbounded.width - (extents->bounded.x + extents->bounded.width), + extents->bounded.height); + } + + /* bottom */ + if (extents->bounded.y + extents->bounded.height != extents->unbounded.y + extents->unbounded.height) { + shader.add_rectangle (&shader, + extents->unbounded.x, + extents->bounded.y + extents->bounded.height, + extents->unbounded.width, + extents->unbounded.y + extents->unbounded.height - (extents->bounded.y + extents->bounded.height)); + } + + i915_shader_fini (&shader); + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i915_fixup_unbounded_boxes (i915_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip, + cairo_boxes_t *boxes) +{ + cairo_boxes_t clear; + cairo_box_t box; + cairo_region_t *clip_region = NULL; + cairo_status_t status; + struct _cairo_boxes_chunk *chunk; + i915_shader_t shader; + int i; + + if (boxes->num_boxes <= 1) + return i915_fixup_unbounded (dst, extents, clip); + + i915_shader_init (&shader, dst, CAIRO_OPERATOR_CLEAR); + _cairo_boxes_init (&clear); + + box.p1.x = _cairo_fixed_from_int (extents->unbounded.x + extents->unbounded.width); + box.p1.y = _cairo_fixed_from_int (extents->unbounded.y); + box.p2.x = _cairo_fixed_from_int (extents->unbounded.x); + box.p2.y = _cairo_fixed_from_int (extents->unbounded.y + extents->unbounded.height); + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i915_shader_set_clip (&shader, clip); + } + + if (clip_region == NULL) { + cairo_boxes_t tmp; + + _cairo_boxes_init (&tmp); + + status = _cairo_boxes_add (&tmp, &box); + assert (status == CAIRO_STATUS_SUCCESS); + + tmp.chunks.next = &boxes->chunks; + tmp.num_boxes += boxes->num_boxes; + + status = _cairo_bentley_ottmann_tessellate_boxes (&tmp, + CAIRO_FILL_RULE_WINDING, + &clear); + + tmp.chunks.next = NULL; + } else { + pixman_box32_t *pbox; + + pbox = pixman_region32_rectangles (&clip_region->rgn, &i); + _cairo_boxes_limit (&clear, (cairo_box_t *) pbox, i); + + status = _cairo_boxes_add (&clear, &box); + assert (status == CAIRO_STATUS_SUCCESS); + + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + for (i = 0; i < chunk->count; i++) { + status = _cairo_boxes_add (&clear, &chunk->base[i]); + if (unlikely (status)) { + _cairo_boxes_fini (&clear); + return status; + } + } + } + + status = _cairo_bentley_ottmann_tessellate_boxes (&clear, + CAIRO_FILL_RULE_WINDING, + &clear); + } + + if (likely (status == CAIRO_STATUS_SUCCESS && clear.num_boxes)) { + status = i915_shader_commit (&shader, + (i915_device_t *) dst->intel.drm.base.device); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + for (chunk = &clear.chunks; chunk != NULL; chunk = chunk->next) { + for (i = 0; i < chunk->count; i++) { + int x1 = _cairo_fixed_integer_part (chunk->base[i].p1.x); + int y1 = _cairo_fixed_integer_part (chunk->base[i].p1.y); + int x2 = _cairo_fixed_integer_part (chunk->base[i].p2.x); + int y2 = _cairo_fixed_integer_part (chunk->base[i].p2.y); + + shader.add_rectangle (&shader, x1, y1, x2 - x1, y2 - y1); + } + } + } + i915_shader_fini (&shader); + } + + _cairo_boxes_fini (&clear); + + return status; +} + +static cairo_status_t +_composite_boxes (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_boxes_t *boxes, + cairo_antialias_t antialias, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + cairo_bool_t need_clip_surface = FALSE; + cairo_region_t *clip_region = NULL; + const struct _cairo_boxes_chunk *chunk; + cairo_status_t status; + i915_shader_t shader; + int i; + + /* If the boxes are not pixel-aligned, we will need to compute a real mask */ + if (antialias != CAIRO_ANTIALIAS_NONE) { + if (! boxes->is_pixel_aligned) + return CAIRO_INT_STATUS_UNSUPPORTED; + } + + i915_shader_init (&shader, dst, op); + + status = i915_shader_acquire_pattern (&shader, + &shader.source, + pattern, + &extents->bounded); + if (unlikely (status)) + return status; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + if (need_clip_surface) + i915_shader_set_clip (&shader, clip); + } + + status = i915_shader_commit (&shader, + (i915_device_t *) dst->intel.drm.base.device); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + cairo_box_t *box = chunk->base; + for (i = 0; i < chunk->count; i++) { + int x1 = _cairo_fixed_integer_round (box[i].p1.x); + int y1 = _cairo_fixed_integer_round (box[i].p1.y); + int x2 = _cairo_fixed_integer_round (box[i].p2.x); + int y2 = _cairo_fixed_integer_round (box[i].p2.y); + + if (x2 > x1 && y2 > y1) + shader.add_rectangle (&shader, x1, y1, x2 - x1, y2 - y1); + } + } + + if (! extents->is_bounded) + status = i915_fixup_unbounded_boxes (dst, extents, clip, boxes); + } + i915_shader_fini (&shader); + + return status; +} + +static cairo_status_t +_clip_and_composite_boxes (i915_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *src, + cairo_boxes_t *boxes, + cairo_antialias_t antialias, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip) +{ + cairo_status_t status; + + if (boxes->num_boxes == 0) { + if (extents->is_bounded) + return CAIRO_STATUS_SUCCESS; + + return i915_fixup_unbounded (dst, extents, clip); + } + + /* Use a fast path if the boxes are pixel aligned */ + status = _composite_boxes (dst, op, src, boxes, antialias, clip, extents); + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + return status; + + /* Otherwise render the boxes via an implicit mask and composite in the usual + * fashion. + */ + return i915_clip_and_composite_spans (dst, op, src, antialias, + _composite_boxes_spans, boxes, + extents, clip); +} + +static cairo_bool_t +box_is_aligned (const cairo_box_t *box) +{ + return + _cairo_fixed_is_integer (box->p1.x) && + _cairo_fixed_is_integer (box->p1.y) && + _cairo_fixed_is_integer (box->p2.x) && + _cairo_fixed_is_integer (box->p2.y); +} + +static inline cairo_status_t +_clip_to_boxes (cairo_clip_t **clip, + const cairo_composite_rectangles_t *extents, + cairo_box_t **boxes, + int *num_boxes) +{ + cairo_status_t status; + const cairo_rectangle_int_t *rect; + + rect = extents->is_bounded ? &extents->bounded: &extents->unbounded; + + if (*clip == NULL) + goto EXTENTS; + + status = _cairo_clip_rectangle (*clip, rect); + if (unlikely (status)) + return status; + + status = _cairo_clip_get_boxes (*clip, boxes, num_boxes); + if (status != CAIRO_INT_STATUS_UNSUPPORTED) { + if (extents->is_bounded || (*num_boxes == 1 && box_is_aligned (*boxes))) + *clip = NULL; + return status; + } + + EXTENTS: + _cairo_box_from_rectangle (&(*boxes)[0], rect); + *num_boxes = 1; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_clip_path_t * +_clip_get_solitary_path (cairo_clip_t *clip) +{ + cairo_clip_path_t *iter = clip->path; + cairo_clip_path_t *path = NULL; + + do { + if ((iter->flags & CAIRO_CLIP_PATH_IS_BOX) == 0) { + if (path != NULL) + return FALSE; + + path = iter; + } + iter = iter->prev; + } while (iter != NULL); + + return path; +} + +static cairo_int_status_t +i915_surface_paint (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_clip_t *clip) +{ + i915_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_clip_path_t *clip_path; + cairo_boxes_t boxes; + int num_boxes = ARRAY_LENGTH (boxes.boxes_embedded); + cairo_box_t *clip_boxes = boxes.boxes_embedded; + cairo_status_t status; + + /* XXX unsupported operators? use pixel shader blending, eventually */ + + status = _cairo_composite_rectangles_init_for_paint (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, + clip); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + /* If the clip cannot be reduced to a set of boxes, we will need to + * use a clipmask. Paint is special as it is the only operation that + * does not implicitly use a mask, so we may be able to reduce this + * operation to a fill... + */ + if (clip != NULL && + extents.is_bounded && + (clip_path = _clip_get_solitary_path (clip)) != NULL) + { + status = i915_surface_fill (dst, op, source, + &clip_path->path, + clip_path->fill_rule, + clip_path->tolerance, + clip_path->antialias, + NULL); + } + else + { + _cairo_boxes_init_for_array (&boxes, clip_boxes, num_boxes); + status = _clip_and_composite_boxes (dst, op, source, + &boxes, CAIRO_ANTIALIAS_DEFAULT, + &extents, clip); + } + if (clip_boxes != boxes.boxes_embedded) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static cairo_int_status_t +i915_surface_mask (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + const cairo_pattern_t *mask, + cairo_clip_t *clip) +{ + i915_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + i915_shader_t shader; + cairo_clip_t local_clip; + cairo_region_t *clip_region = NULL; + cairo_bool_t need_clip_surface = FALSE; + cairo_bool_t have_clip = FALSE; + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_mask (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, mask, clip); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL && extents.is_bounded) { + clip = _cairo_clip_init_copy (&local_clip, clip); + status = _cairo_clip_rectangle (clip, &extents.bounded); + if (unlikely (status)) { + _cairo_clip_fini (&local_clip); + return status; + } + + have_clip = TRUE; + } + + i915_shader_init (&shader, dst, op); + + status = i915_shader_acquire_pattern (&shader, + &shader.source, + source, + &extents.bounded); + if (unlikely (status)) + goto BAIL; + + status = i915_shader_acquire_pattern (&shader, + &shader.mask, + mask, + &extents.bounded); + if (unlikely (status)) + goto BAIL; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + if (need_clip_surface) + i915_shader_set_clip (&shader, clip); + } + + status = i915_shader_commit (&shader, + (i915_device_t *) dst->intel.drm.base.device); + if (unlikely (status)) + goto BAIL; + + if (clip_region != NULL) { + unsigned int n, num_rectangles; + + num_rectangles = cairo_region_num_rectangles (clip_region); + for (n = 0; n < num_rectangles; n++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, n, &rect); + + shader.add_rectangle (&shader, + rect.x, rect.y, + rect.x + rect.width, rect.y + rect.height); + } + } else { + shader.add_rectangle (&shader, + extents.bounded.x, extents.bounded.y, + extents.bounded.x + extents.bounded.width, + extents.bounded.y + extents.bounded.height); + } + + if (! extents.is_bounded) + status = i915_fixup_unbounded (dst, &extents, clip); + + BAIL: + i915_shader_fini (&shader); + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +typedef struct { + cairo_polygon_t polygon; + cairo_fill_rule_t fill_rule; + cairo_antialias_t antialias; +} composite_polygon_info_t; + +static cairo_status_t +_composite_polygon_spans (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents) +{ + composite_polygon_info_t *info = closure; + cairo_botor_scan_converter_t converter; + cairo_status_t status; + cairo_box_t box; + + box.p1.x = _cairo_fixed_from_int (extents->x); + box.p1.y = _cairo_fixed_from_int (extents->y); + box.p2.x = _cairo_fixed_from_int (extents->x + extents->width); + box.p2.y = _cairo_fixed_from_int (extents->y + extents->height); + + _cairo_botor_scan_converter_init (&converter, &box, info->fill_rule); + + status = converter.base.add_polygon (&converter.base, &info->polygon); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = converter.base.generate (&converter.base, renderer); + + converter.base.destroy (&converter.base); + + return status; +} + +static cairo_int_status_t +i915_surface_stroke (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + const cairo_stroke_style_t *stroke_style, + const cairo_matrix_t *ctm, + const cairo_matrix_t *ctm_inverse, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + i915_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + composite_polygon_info_t info; + cairo_box_t boxes_stack[32], *clip_boxes = boxes_stack; + int num_boxes = ARRAY_LENGTH (boxes_stack); + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_stroke (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, + path, stroke_style, ctm, + clip); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + if (path->is_rectilinear) { + cairo_boxes_t boxes; + + _cairo_boxes_init (&boxes); + _cairo_boxes_limit (&boxes, clip_boxes, num_boxes); + status = _cairo_path_fixed_stroke_rectilinear_to_boxes (path, + stroke_style, + ctm, + &boxes); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = _clip_and_composite_boxes (dst, op, source, + &boxes, antialias, + &extents, clip); + } + + _cairo_boxes_fini (&boxes); + + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + goto CLEANUP_BOXES; + } + + _cairo_polygon_init (&info.polygon); + _cairo_polygon_limit (&info.polygon, clip_boxes, num_boxes); + + status = _cairo_path_fixed_stroke_to_polygon (path, + stroke_style, + ctm, ctm_inverse, + tolerance, + &info.polygon); + if (unlikely (status)) + goto CLEANUP_POLYGON; + + if (extents.is_bounded) { + cairo_rectangle_int_t rect; + + _cairo_box_round_to_rectangle (&info.polygon.extents, &rect); + if (! _cairo_rectangle_intersect (&extents.bounded, &rect)) + goto CLEANUP_POLYGON; + } + + if (info.polygon.num_edges == 0) { + if (! extents.is_bounded) + status = i915_fixup_unbounded (dst, &extents, clip); + + goto CLEANUP_POLYGON; + } + + info.fill_rule = CAIRO_FILL_RULE_WINDING; + info.antialias = antialias; + status = i915_clip_and_composite_spans (dst, op, source, antialias, + _composite_polygon_spans, &info, + &extents, clip); + +CLEANUP_POLYGON: + _cairo_polygon_fini (&info.polygon); + +CLEANUP_BOXES: + if (clip_boxes != boxes_stack) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static cairo_int_status_t +i915_surface_fill (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t*source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + i915_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + composite_polygon_info_t info; + cairo_box_t boxes_stack[32], *clip_boxes = boxes_stack; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + int num_boxes = ARRAY_LENGTH (boxes_stack); + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_fill (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, path, + clip); + if (unlikely (status)) + return status; + + if (_cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + assert (! path->is_empty_fill); + + if (_cairo_path_fixed_is_rectilinear_fill (path)) { + cairo_boxes_t boxes; + + _cairo_boxes_init (&boxes); + _cairo_boxes_limit (&boxes, clip_boxes, num_boxes); + status = _cairo_path_fixed_fill_rectilinear_to_boxes (path, + fill_rule, + &boxes); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = _clip_and_composite_boxes (dst, op, source, + &boxes, antialias, + &extents, clip); + } + + _cairo_boxes_fini (&boxes); + + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + goto CLEANUP_BOXES; + } + + _cairo_polygon_init (&info.polygon); + _cairo_polygon_limit (&info.polygon, clip_boxes, num_boxes); + + status = _cairo_path_fixed_fill_to_polygon (path, tolerance, &info.polygon); + if (unlikely (status)) + goto CLEANUP_POLYGON; + + if (extents.is_bounded) { + cairo_rectangle_int_t rect; + + _cairo_box_round_to_rectangle (&info.polygon.extents, &rect); + if (! _cairo_rectangle_intersect (&extents.bounded, &rect)) + goto CLEANUP_POLYGON; + } + + if (info.polygon.num_edges == 0) { + if (! extents.is_bounded) + status = i915_fixup_unbounded (dst, &extents, clip); + + goto CLEANUP_POLYGON; + } + + info.fill_rule = fill_rule; + info.antialias = antialias; + status = i915_clip_and_composite_spans (dst, op, source, antialias, + _composite_polygon_spans, &info, + &extents, clip); + +CLEANUP_POLYGON: + _cairo_polygon_fini (&info.polygon); + +CLEANUP_BOXES: + if (clip_boxes != boxes_stack) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static const cairo_surface_backend_t i915_surface_backend = { + CAIRO_SURFACE_TYPE_DRM, + + _cairo_drm_surface_create_similar, + i915_surface_finish, + intel_surface_acquire_source_image, + intel_surface_release_source_image, + + NULL, NULL, NULL, + NULL, /* composite */ + NULL, /* fill */ + NULL, /* trapezoids */ + NULL, /* span */ + NULL, /* check-span */ + + NULL, /* copy_page */ + NULL, /* show_page */ + _cairo_drm_surface_get_extents, + NULL, /* old-glyphs */ + _cairo_drm_surface_get_font_options, + + i915_surface_flush, + NULL, /* mark_dirty */ + intel_scaled_font_fini, + intel_scaled_glyph_fini, + + i915_surface_paint, + i915_surface_mask, + i915_surface_stroke, + i915_surface_fill, + i915_surface_glyphs, +}; + +static void +i915_surface_init (i915_surface_t *surface, + cairo_content_t content, + cairo_drm_device_t *device) +{ + intel_surface_init (&surface->intel, &i915_surface_backend, device, content); + + switch (content) { + default: + ASSERT_NOT_REACHED; + case CAIRO_CONTENT_COLOR_ALPHA: + surface->map0 = MAPSURF_32BIT | MT_32BIT_ARGB8888; + surface->colorbuf = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER; + break; + case CAIRO_CONTENT_COLOR: + surface->map0 = MAPSURF_32BIT | MT_32BIT_XRGB8888; + surface->colorbuf = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER; + break; + case CAIRO_CONTENT_ALPHA: + surface->map0 = MAPSURF_8BIT | MT_8BIT_A8; + surface->colorbuf = COLR_BUF_8BIT | DEPTH_FRMT_24_FIXED_8_OTHER; + break; + } + surface->colorbuf |= DSTORG_HORT_BIAS (0x8) | DSTORG_VERT_BIAS (0x8); + + surface->map1 = 0; + + surface->is_current_texture = 0; + + surface->offset = 0; + + surface->stencil = NULL; + surface->cache = NULL; +} + +cairo_surface_t * +i915_surface_create_internal (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height, + uint32_t tiling, + cairo_bool_t gpu_target) +{ + i915_surface_t *surface; + cairo_status_t status_ignored; + + surface = malloc (sizeof (i915_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + i915_surface_init (surface, content, base_dev); + + if (width && height) { + uint32_t size; + + surface->intel.drm.width = width; + surface->intel.drm.height = height; + surface->map0 |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + + width = (width + 3) & -4; + surface->intel.drm.stride = cairo_format_stride_for_width (surface->intel.drm.format, + width); + /* check for tiny surfaces for which tiling is irrelevant */ + if (height * surface->intel.drm.stride < 4096) + tiling = I915_TILING_NONE; + + surface->intel.drm.stride = i915_tiling_stride (tiling, + surface->intel.drm.stride); + assert (surface->intel.drm.stride <= 8192); + assert (surface->intel.drm.stride >= cairo_format_stride_for_width (surface->intel.drm.format, width)); + height = i915_tiling_height (tiling, height); + assert (height <= 2048); + + size = i915_tiling_size (tiling, surface->intel.drm.stride * height); + + surface->intel.drm.bo = &intel_bo_create (to_intel_device (&base_dev->base), + size, gpu_target)->base; + if (surface->intel.drm.bo == NULL) { + status_ignored = _cairo_drm_surface_finish (&surface->intel.drm); + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + + intel_bo_set_tiling (to_intel_device (&base_dev->base), + to_intel_bo (surface->intel.drm.bo), + tiling, surface->intel.drm.stride); + + assert (surface->intel.drm.bo->size >= (size_t) surface->intel.drm.stride*height); + + surface->map0 |= MS3_tiling (to_intel_bo (surface->intel.drm.bo)->tiling); + surface->map1 = (surface->intel.drm.stride/4 - 1) << MS4_PITCH_SHIFT; + } + + return &surface->intel.drm.base; +} + +static cairo_surface_t * +i915_surface_create (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height) +{ + return i915_surface_create_internal (base_dev, content, width, height, + I915_TILING_DEFAULT, TRUE); +} + +static cairo_surface_t * +i915_surface_create_for_name (cairo_drm_device_t *base_dev, + unsigned int name, + cairo_format_t format, + int width, int height, int stride) +{ + i915_surface_t *surface; + cairo_content_t content; + + /* Vol I, p134: size restrictions for textures */ + /* Vol I, p129: destination surface stride must be a multiple of 32 bytes */ + if (stride < cairo_format_stride_for_width (format, (width + 3) & -4) || + stride & 31) + { + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_STRIDE)); + } + + switch (format) { + default: + case CAIRO_FORMAT_A1: + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); + case CAIRO_FORMAT_ARGB32: + content = CAIRO_CONTENT_COLOR_ALPHA; + break; + case CAIRO_FORMAT_RGB24: + content = CAIRO_CONTENT_COLOR; + break; + case CAIRO_FORMAT_A8: + content = CAIRO_CONTENT_ALPHA; + break; + } + + surface = malloc (sizeof (i915_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + i915_surface_init (surface, content, base_dev); + + if (width && height) { + surface->intel.drm.width = width; + surface->intel.drm.height = height; + surface->intel.drm.stride = stride; + + surface->map0 |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + surface->map1 = (surface->intel.drm.stride/4 - 1) << MS4_PITCH_SHIFT; + + surface->intel.drm.bo = + &intel_bo_create_for_name (to_intel_device (&base_dev->base), + name)->base; + if (unlikely (surface->intel.drm.bo == NULL)) { + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + to_intel_bo (surface->intel.drm.bo)->stride = stride; + + surface->map0 |= MS3_tiling (to_intel_bo (surface->intel.drm.bo)->tiling); + } + + return &surface->intel.drm.base; +} + +static cairo_status_t +i915_buffer_cache_init (intel_buffer_cache_t *cache, + i915_device_t *device, + cairo_format_t format, + int width, int height) +{ + const uint32_t tiling = I915_TILING_Y; + + assert ((width & 3) == 0); + assert ((height & 1) == 0); + cache->buffer.width = width; + cache->buffer.height = height; + + switch (format) { + case CAIRO_FORMAT_A1: + case CAIRO_FORMAT_RGB24: + ASSERT_NOT_REACHED; + case CAIRO_FORMAT_ARGB32: + cache->buffer.map0 = MAPSURF_32BIT | MT_32BIT_ARGB8888; + cache->buffer.stride = width * 4; + break; + case CAIRO_FORMAT_A8: + cache->buffer.map0 = MAPSURF_8BIT | MT_8BIT_I8; + cache->buffer.stride = width; + break; + } + cache->buffer.map0 |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + cache->buffer.map1 = ((cache->buffer.stride / 4) - 1) << MS4_PITCH_SHIFT; + + assert ((cache->buffer.stride & 7) == 0); + assert (i915_tiling_stride (tiling, cache->buffer.stride) == cache->buffer.stride); + assert (i915_tiling_height (tiling, height) == height); + + cache->buffer.bo = intel_bo_create (&device->intel, + height * cache->buffer.stride, + FALSE); + if (unlikely (cache->buffer.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + intel_bo_set_tiling (&device->intel, cache->buffer.bo, tiling, cache->buffer.stride); + cache->buffer.map0 |= MS3_tiling (cache->buffer.bo->tiling); + + cache->ref_count = 0; + cairo_list_init (&cache->link); + + return CAIRO_STATUS_SUCCESS; +} + +i915_surface_t * +i915_surface_create_from_cacheable_image_internal (i915_device_t *device, + cairo_image_surface_t *image) +{ + i915_surface_t *surface; + cairo_status_t status; + cairo_list_t *caches; + intel_buffer_cache_t *cache; + cairo_rtree_node_t *node; + cairo_format_t format; + int width, height, bpp; + + width = image->width; + height = image->height; + if (width > IMAGE_CACHE_WIDTH/2 || height > IMAGE_CACHE_HEIGHT/2) { + surface = (i915_surface_t *) + i915_surface_create_internal (&device->intel.base, + image->base.content, + width, height, + I915_TILING_NONE, FALSE); + if (unlikely (surface->intel.drm.base.status)) + return surface; + + status = intel_bo_put_image (&device->intel, + to_intel_bo (surface->intel.drm.bo), + surface->intel.drm.stride, + image, + 0, 0, + width, height, + 0, 0); + + if (unlikely (status)) { + cairo_surface_destroy (&surface->intel.drm.base); + return (i915_surface_t *) _cairo_surface_create_in_error (status); + } + + return surface; + } + + status = cairo_device_acquire (&device->intel.base.base); + if (unlikely (status)) + return (i915_surface_t *) _cairo_surface_create_in_error (status); + + switch (image->format) { + case CAIRO_FORMAT_ARGB32: + case CAIRO_FORMAT_RGB24: + caches = &device->image_caches[0]; + format = CAIRO_FORMAT_ARGB32; + bpp = 4; + break; + case CAIRO_FORMAT_A8: + case CAIRO_FORMAT_A1: + caches = &device->image_caches[1]; + format = CAIRO_FORMAT_A8; + bpp = 1; + break; + default: + ASSERT_NOT_REACHED; + status = _cairo_error (CAIRO_STATUS_INVALID_FORMAT); + goto CLEANUP_DEVICE; + } + + node = NULL; + cairo_list_foreach_entry (cache, intel_buffer_cache_t, caches, link) { + if (! intel_bo_is_inactive (&device->intel, cache->buffer.bo)) + continue; + + status = _cairo_rtree_insert (&cache->rtree, width, height, &node); + if (unlikely (_cairo_status_is_error (status))) + goto CLEANUP_DEVICE; + if (status == CAIRO_STATUS_SUCCESS) + break; + } + if (node == NULL) { + cache = malloc (sizeof (intel_buffer_cache_t)); + if (unlikely (cache == NULL)) { + status = _cairo_error (CAIRO_STATUS_NO_MEMORY); + goto CLEANUP_DEVICE; + } + + status = i915_buffer_cache_init (cache, device, format, + IMAGE_CACHE_WIDTH, + IMAGE_CACHE_HEIGHT); + if (unlikely (status)) { + free (cache); + goto CLEANUP_DEVICE; + } + + _cairo_rtree_init (&cache->rtree, + IMAGE_CACHE_WIDTH, + IMAGE_CACHE_HEIGHT, + 4, + sizeof (i915_image_private_t), + NULL); + + status = _cairo_rtree_insert (&cache->rtree, width, height, &node); + assert (status == CAIRO_STATUS_SUCCESS); + + cairo_list_init (&cache->link); + } + cairo_list_move (&cache->link, caches); + ((i915_image_private_t *) node)->container = cache; + + status = intel_bo_put_image (&device->intel, + cache->buffer.bo, cache->buffer.stride, + image, + 0, 0, + width, height, + node->x, node->y); + if (unlikely (status)) + goto CLEANUP_CACHE; + + surface = malloc (sizeof (i915_surface_t)); + if (unlikely (surface == NULL)) { + status = _cairo_error (CAIRO_STATUS_NO_MEMORY); + goto CLEANUP_CACHE; + } + + i915_surface_init (surface, image->base.content, &device->intel.base); + + surface->intel.drm.width = width; + surface->intel.drm.height = height; + surface->intel.drm.stride = cache->buffer.stride; + + surface->map0 |= MS3_tiling (cache->buffer.bo->tiling) | + ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + surface->map1 = (surface->intel.drm.stride/4 - 1) << MS4_PITCH_SHIFT; + + surface->intel.drm.bo = &intel_bo_reference (cache->buffer.bo)->base; + surface->offset = node->y * cache->buffer.stride + bpp * node->x; + + surface->cache = (i915_image_private_t *) node; + cache->ref_count++; + + cairo_device_release (&device->intel.base.base); + + return surface; + +CLEANUP_CACHE: + _cairo_rtree_node_destroy (&cache->rtree, node); + if (cache->ref_count == 0) { + intel_bo_destroy (&device->intel, cache->buffer.bo); + _cairo_rtree_fini (&cache->rtree); + cairo_list_del (&cache->link); + free (cache); + } +CLEANUP_DEVICE: + cairo_device_release (&device->intel.base.base); + return (i915_surface_t *) _cairo_surface_create_in_error (status); +} + +static cairo_surface_t * +i915_surface_create_from_cacheable_image (cairo_drm_device_t *device, + cairo_surface_t *source) +{ + i915_surface_t *surface; + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + + status = _cairo_surface_acquire_source_image (source, &image, &image_extra); + if (unlikely (status)) + return _cairo_surface_create_in_error (status); + + surface = i915_surface_create_from_cacheable_image_internal ((i915_device_t *) device, image); + + _cairo_surface_release_source_image (source, image, image_extra); + + return &surface->intel.drm.base; +} + +static cairo_status_t +i915_surface_enable_scan_out (void *abstract_surface) +{ + i915_surface_t *surface = abstract_surface; + intel_bo_t *bo; + cairo_status_t status; + + if (unlikely (surface->intel.drm.bo == NULL)) + return _cairo_error (CAIRO_STATUS_INVALID_SIZE); + + bo = to_intel_bo (surface->intel.drm.bo); + if (bo->tiling == I915_TILING_Y) { + status = i915_surface_batch_flush (surface); + if (unlikely (status)) + return status; + + intel_bo_set_tiling (to_intel_device (surface->intel.drm.base.device), + bo, I915_TILING_X, surface->intel.drm.stride); + if (bo->tiling == I915_TILING_X) { + surface->map0 &= ~MS3_tiling (I915_TILING_Y); + surface->map0 |= MS3_tiling (I915_TILING_X); + } + } + + if (unlikely (bo->tiling == I915_TILING_Y)) + return _cairo_error (CAIRO_STATUS_INVALID_FORMAT); /* XXX */ + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_int_status_t +i915_device_flush (cairo_drm_device_t *device) +{ + cairo_status_t status; + + if (unlikely (device->base.finished)) + return CAIRO_STATUS_SUCCESS; + + status = cairo_device_acquire (&device->base); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = i915_batch_flush ((i915_device_t *) device); + cairo_device_release (&device->base); + } + + return status; +} + +static cairo_int_status_t +i915_device_throttle (cairo_drm_device_t *device) +{ + cairo_status_t status; + + status = cairo_device_acquire (&device->base); + if (unlikely (status)) + return status; + + status = i915_batch_flush ((i915_device_t *) device); + intel_throttle ((intel_device_t *) device); + + cairo_device_release (&device->base); + + return status; +} + +static void +i915_device_destroy (void *data) +{ + i915_device_t *device = data; + + if (device->last_vbo) + intel_bo_destroy (&device->intel, device->last_vbo); + + i915_batch_cleanup (device); + + intel_device_fini (&device->intel); + free (device); +} + +COMPILE_TIME_ASSERT (sizeof (i915_batch_setup) == sizeof (((i915_device_t *)0)->batch_header)); +COMPILE_TIME_ASSERT (offsetof (i915_device_t, batch_base) == offsetof (i915_device_t, batch_header) + sizeof (i915_batch_setup)); + +cairo_drm_device_t * +_cairo_drm_i915_device_create (int fd, dev_t dev_id, int vendor_id, int chip_id) +{ + i915_device_t *device; + cairo_status_t status; + uint64_t gtt_size; + int n; + + if (! intel_info (fd, >t_size)) + return NULL; + + device = malloc (sizeof (i915_device_t)); + if (device == NULL) + return (cairo_drm_device_t *) _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); + + status = intel_device_init (&device->intel, fd); + if (unlikely (status)) { + free (device); + return (cairo_drm_device_t *) _cairo_device_create_in_error (status); + } + + device->debug = 0; + if (getenv ("CAIRO_DEBUG_DRM") != NULL) + device->debug = I915_DEBUG_BATCH; + + device->batch.gtt_size = I915_BATCH_SIZE; + device->batch.exec_count = 0; + device->batch.reloc_count = 0; + device->batch.used = 0; + + memcpy (device->batch_header, i915_batch_setup, sizeof (i915_batch_setup)); + device->vbo = 0; + device->vbo_offset = 0; + device->vbo_used = 0; + device->vertex_index = 0; + device->vertex_count = 0; + device->last_vbo = NULL; + + device->current_n_samplers = 0; + device->current_target = NULL; + device->current_source = NULL; + device->current_mask = NULL; + device->current_clip = NULL; + device->current_colorbuf = 0; + + for (n = 0; n < ARRAY_LENGTH (device->image_caches); n++) + cairo_list_init (&device->image_caches[n]); + + device->intel.base.surface.create = i915_surface_create; + device->intel.base.surface.create_for_name = i915_surface_create_for_name; + device->intel.base.surface.create_from_cacheable_image = i915_surface_create_from_cacheable_image; + + device->intel.base.surface.flink = _cairo_drm_surface_flink; + device->intel.base.surface.enable_scan_out = i915_surface_enable_scan_out; + device->intel.base.surface.map_to_image = intel_surface_map_to_image; + + device->intel.base.device.flush = i915_device_flush; + device->intel.base.device.throttle = i915_device_throttle; + device->intel.base.device.destroy = i915_device_destroy; + + i915_device_reset (device); + + return _cairo_drm_device_init (&device->intel.base, + fd, dev_id, vendor_id, chip_id, + 2048); +} diff --git a/src/drm/cairo-drm-i965-glyphs.c b/src/drm/cairo-drm-i965-glyphs.c new file mode 100644 index 00000000..fa86e341 --- /dev/null +++ b/src/drm/cairo-drm-i965-glyphs.c @@ -0,0 +1,500 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Red Hat, Inc. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-composite-rectangles-private.h" +#include "cairo-drm-i965-private.h" +#include "cairo-error-private.h" +#include "cairo-rtree-private.h" + +typedef struct _i965_glyphs i965_glyphs_t; + +typedef float * +(*i965_get_rectangle_func_t) (i965_glyphs_t *glyphs); + +struct _i965_glyphs { + i965_get_rectangle_func_t get_rectangle; + i965_shader_t shader; + + struct i965_vbo head, *tail; + + unsigned int vbo_offset; + float *vbo_base; +}; + +static float * +i965_glyphs_emit_rectangle (i965_glyphs_t *glyphs) +{ + return i965_add_rectangle (glyphs->shader.device); +} + +static float * +i965_glyphs_accumulate_rectangle (i965_glyphs_t *glyphs) +{ + float *vertices; + uint32_t size; + + size = glyphs->shader.device->rectangle_size; + if (unlikely (glyphs->vbo_offset + size > I965_VERTEX_SIZE)) { + struct i965_vbo *vbo; + + intel_bo_unmap (glyphs->tail->bo); + + vbo = malloc (sizeof (struct i965_vbo)); + if (unlikely (vbo == NULL)) { + /* throw error! */ + } + + glyphs->tail->next = vbo; + glyphs->tail = vbo; + + vbo->next = NULL; + vbo->bo = intel_bo_create (&glyphs->shader.device->intel, + I965_VERTEX_SIZE, FALSE); + vbo->count = 0; + + glyphs->vbo_offset = 0; + glyphs->vbo_base = intel_bo_map (&glyphs->shader.device->intel, vbo->bo); + } + + vertices = glyphs->vbo_base + glyphs->vbo_offset; + glyphs->vbo_offset += size; + glyphs->tail->count += 3; + + return vertices; +} + +static void +i965_add_glyph_rectangle (i965_glyphs_t *glyphs, + int x1, int y1, + int x2, int y2, + intel_glyph_t *glyph) +{ + float *v; + + /* Each vertex is: + * 2 vertex coordinates + * 1 glyph texture coordinate + */ + + v = glyphs->get_rectangle (glyphs); + + /* bottom right */ + *v++ = x2; *v++ = y2; + *v++ = glyph->texcoord[0]; + + /* bottom left */ + *v++ = x1; *v++ = y2; + *v++ = glyph->texcoord[1]; + + /* top left */ + *v++ = x1; *v++ = y1; + *v++ = glyph->texcoord[2]; +} + +static cairo_status_t +i965_surface_mask_internal (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *source, + i965_surface_t *mask, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + i965_device_t *device; + i965_shader_t shader; + cairo_region_t *clip_region = NULL; + cairo_status_t status; + + i965_shader_init (&shader, dst, op); + + status = i965_shader_acquire_pattern (&shader, &shader.source, + source, &extents->bounded); + if (unlikely (status)) + return status; + + shader.mask.type.vertex = VS_NONE; + shader.mask.type.fragment = FS_SURFACE; + shader.mask.base.content = mask->intel.drm.base.content; + shader.mask.base.filter = i965_filter (CAIRO_FILTER_NEAREST); + shader.mask.base.extend = i965_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_translate (&shader.mask.base.matrix, + -extents->bounded.x + NEAREST_BIAS, + -extents->bounded.y + NEAREST_BIAS); + cairo_matrix_scale (&shader.mask.base.matrix, + 1. / mask->intel.drm.width, + 1. / mask->intel.drm.height); + + shader.mask.base.bo = to_intel_bo (mask->intel.drm.bo); + shader.mask.base.format = mask->intel.drm.format; + shader.mask.base.width = mask->intel.drm.width; + shader.mask.base.height = mask->intel.drm.height; + shader.mask.base.stride = mask->intel.drm.stride; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&shader, clip); + } + + status = cairo_device_acquire (dst->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SHADER; + + device = i965_device (dst); + + status = i965_shader_commit (&shader, device); + if (unlikely (status)) + goto CLEANUP_DEVICE; + + if (clip_region != NULL) { + unsigned int n, num_rectangles; + + num_rectangles = cairo_region_num_rectangles (clip_region); + for (n = 0; n < num_rectangles; n++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, n, &rect); + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + } else { + i965_shader_add_rectangle (&shader, + extents->bounded.x, + extents->bounded.y, + extents->bounded.width, + extents->bounded.height); + } + + if (! extents->is_bounded) + status = i965_fixup_unbounded (dst, extents, clip); + + CLEANUP_DEVICE: + cairo_device_release (&device->intel.base.base); + CLEANUP_SHADER: + i965_shader_fini (&shader); + return status; +} + +cairo_int_status_t +i965_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *g, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining) +{ + i965_surface_t *surface = abstract_surface; + i965_surface_t *mask = NULL; + i965_device_t *device; + i965_glyphs_t glyphs; + cairo_composite_rectangles_t extents; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_bool_t overlap; + cairo_region_t *clip_region = NULL; + intel_bo_t *last_bo = NULL; + cairo_scaled_glyph_t *glyph_cache[64]; + cairo_status_t status; + int mask_x = 0, mask_y = 0; + int i = 0; + + *num_remaining = 0; + status = _cairo_composite_rectangles_init_for_glyphs (&extents, + surface->intel.drm.width, + surface->intel.drm.height, + op, source, + scaled_font, + g, num_glyphs, + clip, + &overlap); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL && extents.is_bounded) { + clip = _cairo_clip_init_copy (&local_clip, clip); + status = _cairo_clip_rectangle (clip, &extents.bounded); + if (unlikely (status)) + return status; + + have_clip = TRUE; + } + + if (overlap || ! extents.is_bounded) { + cairo_content_t content; + + content = CAIRO_CONTENT_ALPHA; + if (scaled_font->options.antialias == CAIRO_ANTIALIAS_SUBPIXEL) + content |= CAIRO_CONTENT_COLOR; + + mask = (i965_surface_t *) + i965_surface_create_internal (&i965_device (surface)->intel.base, + content, + extents.bounded.width, + extents.bounded.height, + I965_TILING_DEFAULT, + TRUE); + if (unlikely (mask->intel.drm.base.status)) + return mask->intel.drm.base.status; + + status = _cairo_surface_paint (&mask->intel.drm.base, + CAIRO_OPERATOR_CLEAR, + &_cairo_pattern_clear.base, + NULL); + if (unlikely (status)) { + cairo_surface_destroy (&mask->intel.drm.base); + return status; + } + + i965_shader_init (&glyphs.shader, mask, CAIRO_OPERATOR_ADD); + + status = i965_shader_acquire_pattern (&glyphs.shader, &glyphs.shader.source, + &_cairo_pattern_white.base, + &extents.bounded); + if (unlikely (status)) { + cairo_surface_destroy (&mask->intel.drm.base); + return status; + } + + mask_x = -extents.bounded.x; + mask_y = -extents.bounded.y; + } else { + i965_shader_init (&glyphs.shader, surface, op); + + status = i965_shader_acquire_pattern (&glyphs.shader, &glyphs.shader.source, + source, &extents.bounded); + if (unlikely (status)) + return status; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&glyphs.shader, clip); + } + } + + glyphs.head.next = NULL; + glyphs.head.bo = NULL; + glyphs.head.count = 0; + glyphs.tail = &glyphs.head; + + device = i965_device (surface); + if (mask != NULL || clip_region == NULL) { + glyphs.get_rectangle = i965_glyphs_emit_rectangle; + } else { + glyphs.get_rectangle = i965_glyphs_accumulate_rectangle; + glyphs.head.bo = intel_bo_create (&device->intel, + I965_VERTEX_SIZE, FALSE); + if (unlikely (glyphs.head.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + glyphs.vbo_base = intel_bo_map (&device->intel, glyphs.head.bo); + } + glyphs.vbo_offset = 0; + + status = cairo_device_acquire (&device->intel.base.base); + if (unlikely (status)) + goto CLEANUP_GLYPHS; + + _cairo_scaled_font_freeze_cache (scaled_font); + //private = _cairo_scaled_font_get_device (scaled_font, device); + if (scaled_font->surface_private == NULL) { + /* XXX couple into list to remove on context destruction */ + scaled_font->surface_private = device; + scaled_font->surface_backend = surface->intel.drm.base.backend; + } + + memset (glyph_cache, 0, sizeof (glyph_cache)); + + for (i = 0; i < num_glyphs; i++) { + cairo_scaled_glyph_t *scaled_glyph; + int x, y, x1, x2, y1, y2; + int cache_index = g[i].index % ARRAY_LENGTH (glyph_cache); + intel_glyph_t *glyph; + + scaled_glyph = glyph_cache[cache_index]; + if (scaled_glyph == NULL || + _cairo_scaled_glyph_index (scaled_glyph) != g[i].index) + { + status = _cairo_scaled_glyph_lookup (scaled_font, + g[i].index, + CAIRO_SCALED_GLYPH_INFO_METRICS, + &scaled_glyph); + if (unlikely (status)) + goto FINISH; + + glyph_cache[cache_index] = scaled_glyph; + } + + if (unlikely (scaled_glyph->metrics.width == 0 || + scaled_glyph->metrics.height == 0)) + { + continue; + } + + /* XXX glyph images are snapped to pixel locations */ + x = _cairo_lround (g[i].x); + y = _cairo_lround (g[i].y); + + x1 = x + _cairo_fixed_integer_floor (scaled_glyph->bbox.p1.x); + y1 = y + _cairo_fixed_integer_floor (scaled_glyph->bbox.p1.y); + x2 = x + _cairo_fixed_integer_ceil (scaled_glyph->bbox.p2.x); + y2 = y + _cairo_fixed_integer_ceil (scaled_glyph->bbox.p2.y); + + if (x2 < extents.bounded.x || + y2 < extents.bounded.y || + x1 > extents.bounded.x + extents.bounded.width || + y1 > extents.bounded.y + extents.bounded.height) + { + continue; + } + + if (scaled_glyph->surface_private == NULL) { + status = intel_get_glyph (&device->intel, scaled_font, scaled_glyph); + if (unlikely (status == CAIRO_INT_STATUS_NOTHING_TO_DO)) { + status = CAIRO_STATUS_SUCCESS; + continue; + } + if (unlikely (status)) + goto FINISH; + } + glyph = intel_glyph_pin (scaled_glyph->surface_private); + + if (glyph->cache->buffer.bo != last_bo) { + intel_buffer_cache_t *cache = glyph->cache; + + glyphs.shader.mask.type.vertex = VS_GLYPHS; + glyphs.shader.mask.type.fragment = FS_GLYPHS; + glyphs.shader.mask.type.pattern = PATTERN_BASE; + + glyphs.shader.mask.base.bo = cache->buffer.bo; + glyphs.shader.mask.base.format = cache->buffer.format; + glyphs.shader.mask.base.width = cache->buffer.width; + glyphs.shader.mask.base.height = cache->buffer.height; + glyphs.shader.mask.base.stride = cache->buffer.stride; + glyphs.shader.mask.base.filter = i965_filter (CAIRO_FILTER_NEAREST); + glyphs.shader.mask.base.extend = i965_extend (CAIRO_EXTEND_NONE); + glyphs.shader.mask.base.content = CAIRO_CONTENT_ALPHA; /* XXX */ + + glyphs.shader.committed = FALSE; + status = i965_shader_commit (&glyphs.shader, device); + if (unlikely (status)) + goto FINISH; + + last_bo = cache->buffer.bo; + } + + x1 += mask_x; x2 += mask_x; + y1 += mask_y; y2 += mask_y; + + i965_add_glyph_rectangle (&glyphs, x1, y1, x2, y2, glyph); + } + + if (mask != NULL && clip_region != NULL) { + intel_bo_unmap (glyphs.tail->bo); + i965_clipped_vertices (device, &glyphs.head, clip_region); + } + + status = CAIRO_STATUS_SUCCESS; + FINISH: + _cairo_scaled_font_thaw_cache (scaled_font); + cairo_device_release (surface->intel.drm.base.device); + CLEANUP_GLYPHS: + i965_shader_fini (&glyphs.shader); + if (glyphs.head.bo != NULL) { + struct i965_vbo *vbo, *next; + + intel_bo_destroy (&device->intel, glyphs.head.bo); + for (vbo = glyphs.head.next; vbo != NULL; vbo = next) { + next = vbo->next; + intel_bo_destroy (&device->intel, vbo->bo); + free (vbo); + } + } + + if (unlikely (status == CAIRO_INT_STATUS_UNSUPPORTED)) { + cairo_path_fixed_t path; + + _cairo_path_fixed_init (&path); + status = _cairo_scaled_font_glyph_path (scaled_font, + g + i, num_glyphs - i, + &path); + if (mask_x | mask_y) { + _cairo_path_fixed_translate (&path, + _cairo_fixed_from_int (mask_x), + _cairo_fixed_from_int (mask_y)); + } + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = surface->intel.drm.base.backend->fill (glyphs.shader.target, + glyphs.shader.op, + mask != NULL ? &_cairo_pattern_white.base : source, + &path, + CAIRO_FILL_RULE_WINDING, + 0, + scaled_font->options.antialias, + clip); + } + _cairo_path_fixed_fini (&path); + } + + if (mask != NULL) { + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = i965_surface_mask_internal (surface, op, source, mask, + clip, &extents); + } + cairo_surface_finish (&mask->intel.drm.base); + cairo_surface_destroy (&mask->intel.drm.base); + } + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} diff --git a/src/drm/cairo-drm-i965-private.h b/src/drm/cairo-drm-i965-private.h new file mode 100644 index 00000000..14fabe9d --- /dev/null +++ b/src/drm/cairo-drm-i965-private.h @@ -0,0 +1,742 @@ +#ifndef CAIRO_DRM_I965_PRIVATE_H +#define CAIRO_DRM_I965_PRIVATE_H + +#include "cairo-drm-intel-private.h" + +#include "cairo-hash-private.h" +#include "cairo-freelist-private.h" + +#include "cairo-drm-intel-brw-defines.h" + +#include <setjmp.h> + +#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3) + +/* + * New regs for broadwater -- we need to split this file up sensibly somehow. + */ +#define BRW_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define BRW_URB_FENCE BRW_3D(0, 0, 0) +#define BRW_CS_URB_STATE BRW_3D(0, 0, 1) +#define BRW_CONSTANT_BUFFER BRW_3D(0, 0, 2) +#define BRW_STATE_PREFETCH BRW_3D(0, 0, 3) + +#define BRW_STATE_BASE_ADDRESS BRW_3D(0, 1, 1) +#define BRW_STATE_SIP BRW_3D(0, 1, 2) +#define BRW_PIPELINE_SELECT BRW_3D(0, 1, 4) + +#define NEW_PIPELINE_SELECT BRW_3D(1, 1, 4) + +#define BRW_MEDIA_STATE_POINTERS BRW_3D(2, 0, 0) +#define BRW_MEDIA_OBJECT BRW_3D(2, 1, 0) + +#define BRW_3DSTATE_PIPELINED_POINTERS BRW_3D(3, 0, 0) +#define BRW_3DSTATE_BINDING_TABLE_POINTERS BRW_3D(3, 0, 1) +#define BRW_3DSTATE_VERTEX_BUFFERS BRW_3D(3, 0, 8) +#define BRW_3DSTATE_VERTEX_ELEMENTS BRW_3D(3, 0, 9) +#define BRW_3DSTATE_INDEX_BUFFER BRW_3D(3, 0, 0xa) +#define BRW_3DSTATE_VF_STATISTICS BRW_3D(3, 0, 0xb) + +#define BRW_3DSTATE_DRAWING_RECTANGLE BRW_3D(3, 1, 0) +#define BRW_3DSTATE_CONSTANT_COLOR BRW_3D(3, 1, 1) +#define BRW_3DSTATE_SAMPLER_PALETTE_LOAD BRW_3D(3, 1, 2) +#define BRW_3DSTATE_CHROMA_KEY BRW_3D(3, 1, 4) +#define BRW_3DSTATE_DEPTH_BUFFER BRW_3D(3, 1, 5) +#define BRW_3DSTATE_POLY_STIPPLE_OFFSET BRW_3D(3, 1, 6) +#define BRW_3DSTATE_POLY_STIPPLE_PATTERN BRW_3D(3, 1, 7) +#define BRW_3DSTATE_LINE_STIPPLE BRW_3D(3, 1, 8) +#define BRW_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP BRW_3D(3, 1, 9) +/* These two are BLC and CTG only, not BW or CL */ +#define BRW_3DSTATE_AA_LINE_PARAMS BRW_3D(3, 1, 0xa) +#define BRW_3DSTATE_GS_SVB_INDEX BRW_3D(3, 1, 0xb) + +#define BRW_PIPE_CONTROL BRW_3D(3, 2, 0) + +#define BRW_3DPRIMITIVE BRW_3D(3, 3, 0) + +#define PIPELINE_SELECT_3D 0 +#define PIPELINE_SELECT_MEDIA 1 + +#define UF0_CS_REALLOC (1 << 13) +#define UF0_VFE_REALLOC (1 << 12) +#define UF0_SF_REALLOC (1 << 11) +#define UF0_CLIP_REALLOC (1 << 10) +#define UF0_GS_REALLOC (1 << 9) +#define UF0_VS_REALLOC (1 << 8) +#define UF1_CLIP_FENCE_SHIFT 20 +#define UF1_GS_FENCE_SHIFT 10 +#define UF1_VS_FENCE_SHIFT 0 +#define UF2_CS_FENCE_SHIFT 20 +#define UF2_VFE_FENCE_SHIFT 10 +#define UF2_SF_FENCE_SHIFT 0 + +/* for BRW_STATE_BASE_ADDRESS */ +#define BASE_ADDRESS_MODIFY (1 << 0) + +/* for BRW_3DSTATE_PIPELINED_POINTERS */ +#define BRW_GS_DISABLE 0 +#define BRW_GS_ENABLE 1 +#define BRW_CLIP_DISABLE 0 +#define BRW_CLIP_ENABLE 1 + +/* for BRW_PIPE_CONTROL */ +#define BRW_PIPE_CONTROL_NOWRITE (0 << 14) +#define BRW_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define BRW_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define BRW_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define BRW_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define BRW_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define BRW_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define BRW_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define BRW_PIPE_CONTROL_LOCAL_PGTT (0 << 2) + +/* VERTEX_BUFFER_STATE Structure */ +#define VB0_BUFFER_INDEX_SHIFT 27 +#define VB0_VERTEXDATA (0 << 26) +#define VB0_INSTANCEDATA (1 << 26) +#define VB0_BUFFER_PITCH_SHIFT 0 + +/* VERTEX_ELEMENT_STATE Structure */ +#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27 +#define VE0_VALID (1 << 26) +#define VE0_FORMAT_SHIFT 16 +#define VE0_OFFSET_SHIFT 0 +#define VE1_VFCOMPONENT_0_SHIFT 28 +#define VE1_VFCOMPONENT_1_SHIFT 24 +#define VE1_VFCOMPONENT_2_SHIFT 20 +#define VE1_VFCOMPONENT_3_SHIFT 16 +#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 + +/* 3DPRIMITIVE bits */ +#define BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) +#define BRW_3DPRIMITIVE_VERTEX_RANDOM (1 << 15) +/* Primitive types are in brw_defines.h */ +#define BRW_3DPRIMITIVE_TOPOLOGY_SHIFT 10 + +#define BRW_SVG_CTL 0x7400 + +#define BRW_SVG_CTL_GS_BA (0 << 8) +#define BRW_SVG_CTL_SS_BA (1 << 8) +#define BRW_SVG_CTL_IO_BA (2 << 8) +#define BRW_SVG_CTL_GS_AUB (3 << 8) +#define BRW_SVG_CTL_IO_AUB (4 << 8) +#define BRW_SVG_CTL_SIP (5 << 8) + +#define BRW_SVG_RDATA 0x7404 +#define BRW_SVG_WORK_CTL 0x7408 + +#define BRW_VF_CTL 0x7500 + +#define BRW_VF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8) +#define BRW_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8) +#define BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4) +#define BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4) +#define BRW_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3) +#define BRW_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2) +#define BRW_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1) +#define BRW_VF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_VF_STRG_VAL 0x7504 +#define BRW_VF_STR_VL_OVR 0x7508 +#define BRW_VF_VC_OVR 0x750c +#define BRW_VF_STR_PSKIP 0x7510 +#define BRW_VF_MAX_PRIM 0x7514 +#define BRW_VF_RDATA 0x7518 + +#define BRW_VS_CTL 0x7600 +#define BRW_VS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8) +#define BRW_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8) +#define BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8) +#define BRW_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8) +#define BRW_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define BRW_VS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_VS_STRG_VAL 0x7604 +#define BRW_VS_RDATA 0x7608 + +#define BRW_SF_CTL 0x7b00 +#define BRW_SF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8) +#define BRW_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4) +#define BRW_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3) +#define BRW_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define BRW_SF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_SF_STRG_VAL 0x7b04 +#define BRW_SF_RDATA 0x7b18 + +#define BRW_WIZ_CTL 0x7c00 +#define BRW_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16 +#define BRW_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8) +#define BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8) +#define BRW_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8) +#define BRW_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6) +#define BRW_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5) +#define BRW_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4) +#define BRW_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3) +#define BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define BRW_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define BRW_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_WIZ_STRG_VAL 0x7c04 +#define BRW_WIZ_RDATA 0x7c18 + +#define BRW_TS_CTL 0x7e00 +#define BRW_TS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8) +#define BRW_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8) +#define BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2) +#define BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1) +#define BRW_TS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_TS_STRG_VAL 0x7e04 +#define BRW_TS_RDATA 0x7e08 + +#define BRW_TD_CTL 0x8000 +#define BRW_TD_CTL_MUX_SHIFT 8 +#define BRW_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7) +#define BRW_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6) +#define BRW_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5) +#define BRW_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4) +#define BRW_TD_CTL_BREAKPOINT_ENABLE (1 << 2) +#define BRW_TD_CTL2 0x8004 +#define BRW_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28) +#define BRW_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26) +#define BRW_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25) +#define BRW_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16 +#define BRW_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8) +#define BRW_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7) +#define BRW_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6) +#define BRW_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5) +#define BRW_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4) +#define BRW_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3) +#define BRW_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0) +#define BRW_TD_VF_VS_EMSK 0x8008 +#define BRW_TD_GS_EMSK 0x800c +#define BRW_TD_CLIP_EMSK 0x8010 +#define BRW_TD_SF_EMSK 0x8014 +#define BRW_TD_WIZ_EMSK 0x8018 +#define BRW_TD_0_6_EHTRG_VAL 0x801c +#define BRW_TD_0_7_EHTRG_VAL 0x8020 +#define BRW_TD_0_6_EHTRG_MSK 0x8024 +#define BRW_TD_0_7_EHTRG_MSK 0x8028 +#define BRW_TD_RDATA 0x802c +#define BRW_TD_TS_EMSK 0x8030 + +#define BRW_EU_CTL 0x8800 +#define BRW_EU_CTL_SELECT_SHIFT 16 +#define BRW_EU_CTL_DATA_MUX_SHIFT 8 +#define BRW_EU_ATT_0 0x8810 +#define BRW_EU_ATT_1 0x8814 +#define BRW_EU_ATT_DATA_0 0x8820 +#define BRW_EU_ATT_DATA_1 0x8824 +#define BRW_EU_ATT_CLR_0 0x8830 +#define BRW_EU_ATT_CLR_1 0x8834 +#define BRW_EU_RDATA 0x8840 + +typedef struct i965_device i965_device_t; +typedef struct i965_surface i965_surface_t; +typedef struct i965_shader i965_shader_t; +typedef struct i965_stream i965_stream_t; + +struct i965_sf_state { + cairo_hash_entry_t entry; + uint32_t offset; +}; + +cairo_private cairo_bool_t +i965_sf_state_equal (const void *, const void *); + +struct i965_cc_state { + cairo_hash_entry_t entry; + uint32_t offset; +}; + +cairo_private cairo_bool_t +i965_cc_state_equal (const void *, const void *); + +struct i965_wm_kernel { + cairo_hash_entry_t entry; + uint32_t offset; +}; + +struct i965_wm_state { + cairo_hash_entry_t entry; + uint32_t kernel; + uint32_t sampler; + uint32_t offset; +}; + +cairo_private cairo_bool_t +i965_wm_state_equal (const void *, const void *); + +struct i965_wm_binding { + cairo_hash_entry_t entry; + uint32_t table[4]; + int size; + uint32_t offset; +}; + +cairo_private cairo_bool_t +i965_wm_binding_equal (const void *, const void *); + +struct i965_sampler { + cairo_hash_entry_t entry; + uint32_t offset; +}; + +struct i965_vbo { + struct i965_vbo *next; + intel_bo_t *bo; + unsigned int count; +}; + +struct i965_surface { + intel_surface_t intel; + + uint32_t stream; + uint32_t offset; +}; + +struct i965_pending_relocation { + uint32_t offset; + uint32_t read_domains; + uint32_t write_domain; + uint32_t delta; +}; + +struct i965_stream { + uint32_t used; + uint32_t committed; + uint32_t size; + uint8_t *data; + uint32_t serial; + + int num_pending_relocations; + int max_pending_relocations; + struct i965_pending_relocation *pending_relocations; + + int num_relocations; + int max_relocations; + struct drm_i915_gem_relocation_entry *relocations; +}; + +#define I965_BATCH_SIZE (16 * 4096) +#define I965_SURFACE_SIZE (16 * 4096) +#define I965_GENERAL_SIZE (16 * 4096) +#define I965_CONSTANT_SIZE (16 * 4096) +#define I965_VERTEX_SIZE (128 * 4096) + +#define I965_TILING_DEFAULT I915_TILING_Y + + +struct i965_device { + intel_device_t intel; + + cairo_bool_t is_g4x; + + i965_shader_t *shader; /* note: only valid during geometry emission */ + + /* track state changes */ + struct i965_sf_state sf_state; + struct i965_cc_state cc_state; + struct i965_wm_state wm_state; + struct i965_wm_binding wm_binding; + + i965_surface_t *target; + uint32_t target_offset; + + intel_bo_t *source; + uint32_t source_offset; + + intel_bo_t *mask; + uint32_t mask_offset; + + intel_bo_t *clip; + uint32_t clip_offset; + + uint32_t draw_rectangle; + + uint32_t vs_offset; + uint32_t border_color_offset; + cairo_hash_table_t *sf_states; + cairo_hash_table_t *cc_states; + cairo_hash_table_t *wm_kernels; + cairo_hash_table_t *wm_states; + cairo_hash_table_t *wm_bindings; + cairo_hash_table_t *samplers; + intel_bo_t *general_state; + + cairo_freelist_t sf_freelist; + cairo_freelist_t cc_freelist; + cairo_freelist_t wm_kernel_freelist; + cairo_freelist_t wm_state_freelist; + cairo_freelist_t wm_binding_freelist; + cairo_freelist_t sampler_freelist; + + uint32_t vertex_type; + uint32_t vertex_size; + uint32_t rectangle_size; + uint32_t last_vertex_size; + + float *constants; /* 4 x matrix + 2 x source */ + unsigned constants_size; + cairo_bool_t have_urb_fences; + + i965_stream_t batch; + uint8_t batch_base[I965_BATCH_SIZE]; + struct drm_i915_gem_relocation_entry batch_relocations[1024]; + + i965_stream_t surface; + uint8_t surface_base[I965_SURFACE_SIZE]; + struct i965_pending_relocation surface_pending_relocations[1]; + struct drm_i915_gem_relocation_entry surface_relocations[512]; + + i965_stream_t general; + uint8_t general_base[I965_GENERAL_SIZE]; + struct i965_pending_relocation general_pending_relocations[1]; + + i965_stream_t vertex; + uint8_t vertex_base[I965_VERTEX_SIZE]; + struct i965_pending_relocation vertex_pending_relocations[512]; + + i965_stream_t constant; + uint8_t constant_base[I965_CONSTANT_SIZE]; + struct i965_pending_relocation constant_pending_relocations[512]; + + struct { + size_t gtt_size; + + intel_bo_t *bo[1024]; + int count; + + struct drm_i915_gem_exec_object2 exec[1024]; + } exec; + cairo_list_t flush; +}; + +typedef enum { + VS_NONE = 0, + VS_GLYPHS, + VS_SPANS, +} i965_vertex_shader_t; + +typedef enum { + FS_NONE = 0, + FS_CONSTANT, + FS_LINEAR, + FS_RADIAL, + FS_SURFACE, + FS_GLYPHS, + FS_SPANS, +} i965_fragment_shader_t; + +typedef enum { + PATTERN_BASE, + PATTERN_SOLID, + PATTERN_LINEAR, + PATTERN_RADIAL, + PATTERN_SURFACE, +} i965_shader_channel_t; +#define PATTERN_NONE (i965_shader_channel_t)-1 + +struct i965_shader { + i965_device_t *device; + i965_surface_t *target; + + cairo_operator_t op; + + cairo_bool_t committed; + cairo_bool_t need_combine; + + float constants[4*8 + 2*8]; /* 4 x matrix + 2 x source */ + unsigned constants_size; + + union i965_shader_channel { + struct { + i965_vertex_shader_t vertex; + i965_fragment_shader_t fragment; + i965_shader_channel_t pattern; + } type; + struct i965_shader_base { + i965_vertex_shader_t vertex; + i965_fragment_shader_t fragment; + i965_shader_channel_t pattern; + + uint32_t mode; + + float constants[8]; + unsigned constants_size; + + intel_bo_t *bo; + cairo_format_t format; + cairo_content_t content; + int width, height, stride; + int filter, extend; + cairo_matrix_t matrix; + cairo_bool_t has_component_alpha; + } base; + struct i965_shader_solid { + struct i965_shader_base base; + } solid; + struct i965_shader_linear { + struct i965_shader_base base; + } linear; + struct i965_shader_radial { + struct i965_shader_base base; + } radial; + struct i965_shader_surface { + struct i965_shader_base base; + cairo_surface_t *surface; + } surface; + } source, mask, clip, dst; + + jmp_buf unwind; +}; + +enum i965_shader_linear_mode { + /* XXX REFLECT */ + LINEAR_TEXTURE, + LINEAR_NONE, + LINEAR_REPEAT, + LINEAR_PAD, +}; + +enum i965_shader_radial_mode { + RADIAL_ONE, + RADIAL_TWO +}; + +typedef cairo_status_t +(*i965_spans_func_t) (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents); + +static inline i965_device_t * +i965_device (i965_surface_t *surface) +{ + return (i965_device_t *) surface->intel.drm.base.device; +} + +cairo_private void +i965_emit_relocation (i965_device_t *device, + i965_stream_t *stream, + intel_bo_t *target, + uint32_t target_offset, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); + +static always_inline uint32_t +i965_stream_emit (i965_stream_t *stream, const void *data, size_t size) +{ + uint32_t offset; + + offset = stream->used; + assert (offset + size <= stream->size); + memcpy (stream->data + offset, data, size); + stream->used += size; + + return offset; +} + +static always_inline void +i965_stream_align (i965_stream_t *stream, uint32_t size) +{ + stream->used = (stream->used + size - 1) & -size; +} + +static always_inline void * +i965_stream_alloc (i965_stream_t *stream, uint32_t align, uint32_t size) +{ + void *ptr; + + if (align) + i965_stream_align (stream, align); + + assert (stream->used + size <= stream->size); + ptr = stream->data + stream->used; + stream->used += size; + + return ptr; +} + +static always_inline uint32_t +i965_stream_offsetof (i965_stream_t *stream, const void *ptr) +{ + return (char *) ptr - (char *) stream->data; +} + +cairo_private void +i965_stream_commit (i965_device_t *device, + i965_stream_t *stream); + +cairo_private void +i965_general_state_reset (i965_device_t *device); + +static inline void +i965_batch_emit_dword (i965_device_t *device, uint32_t dword) +{ + *(uint32_t *) (device->batch.data + device->batch.used) = dword; + device->batch.used += 4; +} + +#define OUT_BATCH(dword) i965_batch_emit_dword(device, dword) + +cairo_private void +i965_clipped_vertices (i965_device_t *device, + struct i965_vbo *vbo, + cairo_region_t *clip_region); + +cairo_private void +i965_flush_vertices (i965_device_t *device); + +cairo_private void +i965_finish_vertices (i965_device_t *device); + +static inline float * +i965_add_rectangle (i965_device_t *device) +{ + float *vertices; + uint32_t size; + + size = device->rectangle_size; + if (unlikely (device->vertex.used + size > device->vertex.size)) + i965_finish_vertices (device); + + vertices = (float *) (device->vertex.data + device->vertex.used); + device->vertex.used += size; + + return vertices; +} + +static inline void +i965_shader_add_rectangle (const i965_shader_t *shader, + int x, int y, + int w, int h) +{ + float *v; + + v= i965_add_rectangle (shader->device); + + /* bottom-right */ + *v++ = x + w; + *v++ = y + h; + + /* bottom-left */ + *v++ = x; + *v++ = y + h; + + /* top-left */ + *v++ = x; + *v++ = y; +} + +cairo_private cairo_surface_t * +i965_surface_create_internal (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height, + uint32_t tiling, + cairo_bool_t gpu_target); + +cairo_private cairo_status_t +i965_clip_and_composite_spans (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + i965_spans_func_t draw_func, + void *draw_closure, + const cairo_composite_rectangles_t*extents, + cairo_clip_t *clip); + +cairo_private cairo_int_status_t +i965_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining); + +cairo_private void +i965_shader_init (i965_shader_t *shader, + i965_surface_t *dst, + cairo_operator_t op); + +cairo_private cairo_status_t +i965_shader_acquire_pattern (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_pattern_t *pattern, + const cairo_rectangle_int_t *extents); + +cairo_private void +i965_shader_set_clip (i965_shader_t *shader, + cairo_clip_t *clip); + +cairo_private cairo_status_t +i965_shader_commit (i965_shader_t *shader, + i965_device_t *device); + +cairo_private void +i965_shader_fini (i965_shader_t *shader); + +cairo_private cairo_status_t +i965_device_flush (i965_device_t *device); + +cairo_private cairo_status_t +i965_fixup_unbounded (i965_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip); + +static inline int +i965_filter (cairo_filter_t filter) +{ + switch (filter) { + default: + case CAIRO_FILTER_FAST: + case CAIRO_FILTER_NEAREST: + return BRW_MAPFILTER_NEAREST; + + case CAIRO_FILTER_GOOD: + case CAIRO_FILTER_BEST: + case CAIRO_FILTER_BILINEAR: + case CAIRO_FILTER_GAUSSIAN: + return BRW_MAPFILTER_LINEAR; + } +} + +static inline int +i965_extend (cairo_extend_t extend) +{ + switch (extend) { + default: + case CAIRO_EXTEND_NONE: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + case CAIRO_EXTEND_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + case CAIRO_EXTEND_PAD: + return BRW_TEXCOORDMODE_CLAMP; + case CAIRO_EXTEND_REFLECT: + return BRW_TEXCOORDMODE_MIRROR; + } +} + +#endif /* CAIRO_DRM_I965_PRIVATE_H */ diff --git a/src/drm/cairo-drm-i965-shader.c b/src/drm/cairo-drm-i965-shader.c new file mode 100644 index 00000000..fc9ae570 --- /dev/null +++ b/src/drm/cairo-drm-i965-shader.c @@ -0,0 +1,2852 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Kristian Høgsberg + * Copyright © 2009 Chris Wilson + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + * Kristian Høgsberg <krh@bitplanet.net> + */ + +#include "cairoint.h" + +#include "cairo-error-private.h" +#include "cairo-drm-i965-private.h" +#include "cairo-surface-subsurface-private.h" +#include "cairo-surface-snapshot-private.h" + +#include "cairo-drm-intel-brw-eu.h" + +#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS +/* for DRI2/DRM interoperability */ +#include "cairo-xcb-private.h" +#endif + +/* Theory of shaders: + * + * 3 types of rectangular inputs: + * (a) standard composite: x,y, use source, mask matrices to compute texcoords + * (b) spans: x,y, alpha, use source matrix + * (c) glyphs: x,y, s,t, use source matrix + * + * 5 types of pixel shaders: + * (a) Solid colour + * (b) Linear gradient (via 1D texture, with precomputed tex) + * (c) Radial gradient (per-pixel s computation, 1D texture) + * (d) Spans (mask only): apply opacity + * (e) Texture (includes glyphs). + * + * Clip masks are limited to 2D textures only. + */ + +/* XXX dual source blending for LERP + ComponentAlpha!!! */ + +#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) + +#define SF_KERNEL_NUM_GRF 1 +#define SF_MAX_THREADS 24 + +#define PS_MAX_THREADS_CTG 50 +#define PS_MAX_THREADS_BRW 32 + +#define URB_CS_ENTRY_SIZE 3 /* We need 4 matrices + 2 sources */ +#define URB_CS_ENTRIES 4 /* 4x sets of CONSTANT_BUFFER */ + +#define URB_VS_ENTRY_SIZE 1 +#define URB_VS_ENTRIES 8 + +#define URB_GS_ENTRY_SIZE 0 +#define URB_GS_ENTRIES 0 + +#define URB_CLIP_ENTRY_SIZE 0 +#define URB_CLIP_ENTRIES 0 + +#define URB_SF_ENTRY_SIZE 1 +#define URB_SF_ENTRIES (SF_MAX_THREADS + 1) + +static void +i965_pipelined_flush (i965_device_t *device) +{ + intel_bo_t *bo, *next; + + if (device->batch.used == 0) + return; + + OUT_BATCH (BRW_PIPE_CONTROL | + BRW_PIPE_CONTROL_NOWRITE | + BRW_PIPE_CONTROL_WC_FLUSH | + 2); + OUT_BATCH(0); /* Destination address */ + OUT_BATCH(0); /* Immediate data low DW */ + OUT_BATCH(0); /* Immediate data high DW */ + + cairo_list_foreach_entry_safe (bo, next, intel_bo_t, &device->flush, link) { + bo->batch_write_domain = 0; + cairo_list_init (&bo->link); + } + cairo_list_init (&device->flush); +} + +static cairo_status_t +i965_shader_acquire_solid (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_solid_pattern_t *solid, + const cairo_rectangle_int_t *extents) +{ + src->type.fragment = FS_CONSTANT; + src->type.vertex = VS_NONE; + src->type.pattern = PATTERN_SOLID; + + src->base.content = solid->content; + if (CAIRO_COLOR_IS_OPAQUE(&solid->color)) + src->base.content &= ~CAIRO_CONTENT_ALPHA; + + src->base.constants[0] = solid->color.red * solid->color.alpha; + src->base.constants[1] = solid->color.green * solid->color.alpha; + src->base.constants[2] = solid->color.blue * solid->color.alpha; + src->base.constants[3] = solid->color.alpha; + src->base.constants_size = 4; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_shader_acquire_linear (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_linear_pattern_t *linear, + const cairo_rectangle_int_t *extents) +{ + intel_buffer_t buffer; + cairo_status_t status; + double x0, y0, sf; + double dx, dy, offset; + + status = intel_gradient_render (&i965_device (shader->target)->intel, + &linear->base, &buffer); + if (unlikely (status)) + return status; + + src->type.vertex = VS_NONE; + src->type.pattern = PATTERN_LINEAR; + src->type.fragment = FS_LINEAR; + src->base.bo = buffer.bo; + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->base.format = buffer.format; + src->base.width = buffer.width; + src->base.height = buffer.height; + src->base.stride = buffer.stride; + src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR); + src->base.extend = i965_extend (linear->base.base.extend); + + dx = _cairo_fixed_to_double (linear->p2.x - linear->p1.x); + dy = _cairo_fixed_to_double (linear->p2.y - linear->p1.y); + sf = 1. / (dx * dx + dy * dy); + dx *= sf; + dy *= sf; + + x0 = _cairo_fixed_to_double (linear->p1.x); + y0 = _cairo_fixed_to_double (linear->p1.y); + offset = dx*x0 + dy*y0; + + if (_cairo_matrix_is_identity (&linear->base.base.matrix)) { + src->base.matrix.xx = dx; + src->base.matrix.xy = dy; + src->base.matrix.x0 = -offset; + } else { + cairo_matrix_t m; + + cairo_matrix_init (&m, dx, 0, dy, 0, -offset, 0); + cairo_matrix_multiply (&src->base.matrix, &linear->base.base.matrix, &m); + } + src->base.matrix.yx = 0.; + src->base.matrix.yy = 1.; + src->base.matrix.y0 = 0.; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_shader_acquire_radial (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_radial_pattern_t *radial, + const cairo_rectangle_int_t *extents) +{ + intel_buffer_t buffer; + cairo_status_t status; + double dx, dy, dr, r1; + + status = intel_gradient_render (&i965_device (shader->target)->intel, + &radial->base, &buffer); + if (unlikely (status)) + return status; + + src->type.vertex = VS_NONE; + src->type.pattern = PATTERN_RADIAL; + src->type.fragment = FS_RADIAL; + src->base.bo = buffer.bo; + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + src->base.format = buffer.format; + src->base.width = buffer.width; + src->base.height = buffer.height; + src->base.stride = buffer.stride; + src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR); + src->base.extend = i965_extend (radial->base.base.extend); + + dx = _cairo_fixed_to_double (radial->c2.x - radial->c1.x); + dy = _cairo_fixed_to_double (radial->c2.y - radial->c1.y); + dr = _cairo_fixed_to_double (radial->r2 - radial->r1); + + r1 = _cairo_fixed_to_double (radial->r1); + + if (FALSE && radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) { + /* XXX dr == 0, meaningless with anything other than PAD */ + src->base.constants[0] = _cairo_fixed_to_double (radial->c1.x) / dr; + src->base.constants[1] = _cairo_fixed_to_double (radial->c1.y) / dr; + src->base.constants[2] = 1. / dr; + src->base.constants[3] = -r1 / dr; + + src->base.constants_size = 4; + src->base.mode = RADIAL_ONE; + } else { + src->base.constants[0] = -_cairo_fixed_to_double (radial->c1.x); + src->base.constants[1] = -_cairo_fixed_to_double (radial->c1.y); + src->base.constants[2] = r1; + src->base.constants[3] = -4 * (dx*dx + dy*dy - dr*dr); + + src->base.constants[4] = -2 * dx; + src->base.constants[5] = -2 * dy; + src->base.constants[6] = -2 * r1 * dr; + src->base.constants[7] = 1 / (2 * (dx*dx + dy*dy - dr*dr)); + + src->base.constants_size = 8; + src->base.mode = RADIAL_TWO; + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_surface_clone (i965_device_t *device, + cairo_image_surface_t *image, + i965_surface_t **clone_out) +{ + i965_surface_t *clone; + cairo_status_t status; + + clone = (i965_surface_t *) + i965_surface_create_internal (&device->intel.base, + image->base.content, + image->width, + image->height, + I965_TILING_DEFAULT, + FALSE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + status = intel_bo_put_image (&device->intel, + to_intel_bo (clone->intel.drm.bo), + clone->intel.drm.stride, + image, + 0, 0, + image->width, image->height, + 0, 0); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + status = _cairo_surface_attach_snapshot (&image->base, + &clone->intel.drm.base, + intel_surface_detach_snapshot); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = intel_snapshot_cache_insert (&device->intel, &clone->intel); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_surface_clone_subimage (i965_device_t *device, + cairo_image_surface_t *image, + const cairo_rectangle_int_t *extents, + i965_surface_t **clone_out) +{ + i965_surface_t *clone; + cairo_status_t status; + + clone = (i965_surface_t *) + i965_surface_create_internal (&device->intel.base, + image->base.content, + extents->width, + extents->height, + I965_TILING_DEFAULT, + FALSE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + status = intel_bo_put_image (to_intel_device (clone->intel.drm.base.device), + to_intel_bo (clone->intel.drm.bo), + clone->intel.drm.stride, + image, + extents->x, extents->y, + extents->width, extents->height, + 0, 0); + if (unlikely (status)) + return status; + + *clone_out = clone; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_shader_acquire_solid_surface (i965_shader_t *shader, + union i965_shader_channel *src, + cairo_surface_t *surface, + const cairo_rectangle_int_t *extents) +{ + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + uint32_t argb; + + status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); + if (unlikely (status)) + return status; + + if (image->format != CAIRO_FORMAT_ARGB32) { + cairo_surface_t *pixel; + cairo_surface_pattern_t pattern; + + /* extract the pixel as argb32 */ + pixel = cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 1, 1); + _cairo_pattern_init_for_surface (&pattern, &image->base); + cairo_matrix_init_translate (&pattern.base.matrix, extents->x, extents->y); + pattern.base.filter = CAIRO_FILTER_NEAREST; + status = _cairo_surface_paint (pixel, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL); + _cairo_pattern_fini (&pattern.base); + + if (unlikely (status)) { + _cairo_surface_release_source_image (surface, image, image_extra); + cairo_surface_destroy (pixel); + return status; + } + + argb = *(uint32_t *) ((cairo_image_surface_t *) pixel)->data; + cairo_surface_destroy (pixel); + } else { + argb = ((uint32_t *) (image->data + extents->y * image->stride))[extents->x]; + } + + _cairo_surface_release_source_image (surface, image, image_extra); + + if (argb >> 24 == 0) + argb = 0; + + src->base.constants[0] = ((argb >> 16) & 0xff) / 255.; + src->base.constants[1] = ((argb >> 8) & 0xff) / 255.; + src->base.constants[2] = ((argb >> 0) & 0xff) / 255.; + src->base.constants[3] = ((argb >> 24) & 0xff) / 255.; + src->base.constants_size = 4; + + src->base.content = CAIRO_CONTENT_COLOR_ALPHA; + if (CAIRO_ALPHA_IS_OPAQUE(src->base.constants[3])) + src->base.content &= ~CAIRO_CONTENT_ALPHA; + src->type.fragment = FS_CONSTANT; + src->type.vertex = VS_NONE; + src->type.pattern = PATTERN_SOLID; + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_shader_acquire_surface (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_surface_pattern_t *pattern, + const cairo_rectangle_int_t *extents) +{ + cairo_surface_t *surface, *drm; + cairo_matrix_t m; + cairo_status_t status; + int src_x = 0, src_y = 0; + + assert (src->type.fragment == FS_NONE); + drm = surface = pattern->surface; + +#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS + if (surface->type == CAIRO_SURFACE_TYPE_XCB) { + cairo_surface_t *xcb = surface; + + if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + xcb = ((cairo_surface_subsurface_t *) surface)->target; + } else if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { + xcb = ((cairo_surface_snapshot_t *) surface)->target; + } + + /* XXX copy windows (IncludeInferiors) to a pixmap/drm surface + * xcb = _cairo_xcb_surface_to_drm (xcb) + */ + xcb = ((cairo_xcb_surface_t *) xcb)->drm; + if (xcb != NULL) + drm = xcb; + } +#endif + + if (surface->type == CAIRO_SURFACE_TYPE_DRM) { + if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + drm = ((cairo_surface_subsurface_t *) surface)->target; + } else if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) { + drm = ((cairo_surface_snapshot_t *) surface)->target; + } + } + + src->type.pattern = PATTERN_SURFACE; + src->surface.surface = NULL; + if (drm->type == CAIRO_SURFACE_TYPE_DRM) { + i965_surface_t *s = (i965_surface_t *) drm; + + if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SUBSURFACE) { + if (s->intel.drm.base.device == shader->target->intel.drm.base.device) { + cairo_surface_subsurface_t *sub = (cairo_surface_subsurface_t *) surface; + if (s != shader->target) { + int x; + + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) + i965_pipelined_flush (i965_device (s)); + + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (s->intel.drm.bo); + src->base.format = s->intel.drm.format; + src->base.content = s->intel.drm.base.content; + src->base.width = sub->extents.width; + src->base.height = sub->extents.height; + src->base.stride = s->intel.drm.stride; + + x = sub->extents.x; + if (s->intel.drm.format != CAIRO_FORMAT_A8) + x *= 4; + + /* XXX tiling restrictions upon offset? */ + //src->base.offset[0] = s->offset + sub->extents.y * s->intel.drm.stride + x; + } else { + i965_surface_t *clone; + cairo_surface_pattern_t pattern; + + clone = (i965_surface_t *) + i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device, + s->intel.drm.base.content, + sub->extents.width, + sub->extents.height, + I965_TILING_DEFAULT, + TRUE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base); + pattern.base.filter = CAIRO_FILTER_NEAREST; + cairo_matrix_init_translate (&pattern.base.matrix, + sub->extents.x, sub->extents.y); + + status = _cairo_surface_paint (&clone->intel.drm.base, + CAIRO_OPERATOR_SOURCE, + &pattern.base, + NULL); + + _cairo_pattern_fini (&pattern.base); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + i965_pipelined_flush (i965_device (s)); + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (clone->intel.drm.bo); + src->base.format = clone->intel.drm.format; + src->base.content = clone->intel.drm.base.content; + src->base.width = clone->intel.drm.width; + src->base.height = clone->intel.drm.height; + src->base.stride = clone->intel.drm.stride; + + src->surface.surface = &clone->intel.drm.base; + } + + src_x = sub->extents.x; + src_y = sub->extents.y; + } + } else { + if (s->intel.drm.base.device == shader->target->intel.drm.base.device) { + if (s != shader->target) { + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) + i965_pipelined_flush (i965_device (s)); + + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (s->intel.drm.bo); + src->base.format = s->intel.drm.format; + src->base.content = s->intel.drm.base.content; + src->base.width = s->intel.drm.width; + src->base.height = s->intel.drm.height; + src->base.stride = s->intel.drm.stride; + } else { + i965_surface_t *clone; + cairo_surface_pattern_t pattern; + + clone = (i965_surface_t *) + i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device, + s->intel.drm.base.content, + s->intel.drm.width, + s->intel.drm.height, + I965_TILING_DEFAULT, + TRUE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base); + pattern.base.filter = CAIRO_FILTER_NEAREST; + status = _cairo_surface_paint (&clone->intel.drm.base, + CAIRO_OPERATOR_SOURCE, + &pattern.base, + NULL); + + _cairo_pattern_fini (&pattern.base); + + if (unlikely (status)) { + cairo_surface_destroy (&clone->intel.drm.base); + return status; + } + + i965_pipelined_flush (i965_device (s)); + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (clone->intel.drm.bo); + src->base.format = clone->intel.drm.format; + src->base.content = clone->intel.drm.base.content; + src->base.width = clone->intel.drm.width; + src->base.height = clone->intel.drm.height; + src->base.stride = clone->intel.drm.stride; + + src->surface.surface = &clone->intel.drm.base; + } + } + } + } + + if (src->type.fragment == FS_NONE) { + i965_surface_t *s; + + if (extents->width == 1 && extents->height == 1) { + return i965_shader_acquire_solid_surface (shader, src, + surface, extents); + } + + s = (i965_surface_t *) + _cairo_surface_has_snapshot (surface, + shader->target->intel.drm.base.backend); + if (s != NULL) { + i965_device_t *device = i965_device (shader->target); + intel_bo_t *bo = to_intel_bo (s->intel.drm.bo); + + if (bo->purgeable && + ! intel_bo_madvise (&device->intel, bo, I915_MADV_WILLNEED)) + { + _cairo_surface_detach_snapshot (&s->intel.drm.base); + s = NULL; + } + + if (s != NULL) + cairo_surface_reference (&s->intel.drm.base); + } + + if (s == NULL) { + cairo_image_surface_t *image; + void *image_extra; + cairo_status_t status; + + status = _cairo_surface_acquire_source_image (surface, &image, &image_extra); + if (unlikely (status)) + return status; + + if (image->width < 8192 && image->height < 8192) { + status = i965_surface_clone (i965_device (shader->target), image, &s); + } else { + status = i965_surface_clone_subimage (i965_device (shader->target), + image, extents, &s); + src_x = -extents->x; + src_y = -extents->y; + } + + _cairo_surface_release_source_image (surface, image, image_extra); + + if (unlikely (status)) + return status; + + /* XXX? */ + //intel_bo_mark_purgeable (to_intel_bo (s->intel.drm.bo), TRUE); + } + + src->type.fragment = FS_SURFACE; + + src->base.bo = to_intel_bo (s->intel.drm.bo); + src->base.content = s->intel.drm.base.content; + src->base.format = s->intel.drm.format; + src->base.width = s->intel.drm.width; + src->base.height = s->intel.drm.height; + src->base.stride = s->intel.drm.stride; + + src->surface.surface = &s->intel.drm.base; + + drm = &s->intel.drm.base; + } + + /* XXX transform nx1 or 1xn surfaces to 1D? */ + + src->type.vertex = VS_NONE; + + src->base.extend = i965_extend (pattern->base.extend); + if (pattern->base.extend == CAIRO_EXTEND_NONE && + extents->x >= 0 && extents->y >= 0 && + extents->x + extents->width <= src->base.width && + extents->y + extents->height <= src->base.height) + { + /* Convert a wholly contained NONE to a REFLECT as the contiguous sampler + * cannot not handle CLAMP_BORDER textures. + */ + src->base.extend = i965_extend (CAIRO_EXTEND_REFLECT); + /* XXX also need to check |u,v| < 3 */ + } + + src->base.filter = i965_filter (pattern->base.filter); + if (_cairo_matrix_is_pixel_exact (&pattern->base.matrix)) + src->base.filter = i965_filter (CAIRO_FILTER_NEAREST); + + /* tweak the src matrix to map from dst to texture coordinates */ + src->base.matrix = pattern->base.matrix; + if (src_x | src_y) + cairo_matrix_translate (&src->base.matrix, src_x, src_x); + if (src->base.filter == BRW_MAPFILTER_NEAREST) + cairo_matrix_translate (&src->base.matrix, NEAREST_BIAS, NEAREST_BIAS); + cairo_matrix_init_scale (&m, 1. / src->base.width, 1. / src->base.height); + cairo_matrix_multiply (&src->base.matrix, &src->base.matrix, &m); + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +i965_shader_acquire_pattern (i965_shader_t *shader, + union i965_shader_channel *src, + const cairo_pattern_t *pattern, + const cairo_rectangle_int_t *extents) +{ + switch (pattern->type) { + case CAIRO_PATTERN_TYPE_SOLID: + return i965_shader_acquire_solid (shader, src, + (cairo_solid_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_LINEAR: + return i965_shader_acquire_linear (shader, src, + (cairo_linear_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_RADIAL: + return i965_shader_acquire_radial (shader, src, + (cairo_radial_pattern_t *) pattern, + extents); + + case CAIRO_PATTERN_TYPE_SURFACE: + return i965_shader_acquire_surface (shader, src, + (cairo_surface_pattern_t *) pattern, + extents); + + default: + ASSERT_NOT_REACHED; + return CAIRO_STATUS_SUCCESS; + } +} + +static void +i965_shader_channel_init (union i965_shader_channel *channel) +{ + channel->type.vertex = VS_NONE; + channel->type.fragment = FS_NONE; + channel->type.pattern = PATTERN_NONE; + + channel->base.mode = 0; + channel->base.bo = NULL; + channel->base.filter = i965_extend (CAIRO_FILTER_NEAREST); + channel->base.extend = i965_extend (CAIRO_EXTEND_NONE); + channel->base.has_component_alpha = 0; + channel->base.constants_size = 0; +} + +void +i965_shader_init (i965_shader_t *shader, + i965_surface_t *dst, + cairo_operator_t op) +{ + shader->committed = FALSE; + shader->device = i965_device (dst); + shader->target = dst; + shader->op = op; + shader->constants_size = 0; + + shader->need_combine = FALSE; + + i965_shader_channel_init (&shader->source); + i965_shader_channel_init (&shader->mask); + i965_shader_channel_init (&shader->clip); + i965_shader_channel_init (&shader->dst); +} + +void +i965_shader_fini (i965_shader_t *shader) +{ + if (shader->source.type.pattern == PATTERN_SURFACE) + cairo_surface_destroy (shader->source.surface.surface); + if (shader->mask.type.pattern == PATTERN_SURFACE) + cairo_surface_destroy (shader->mask.surface.surface); + if (shader->clip.type.pattern == PATTERN_SURFACE) + cairo_surface_destroy (shader->clip.surface.surface); + if (shader->dst.type.pattern == PATTERN_SURFACE) + cairo_surface_destroy (shader->dst.surface.surface); +} + +void +i965_shader_set_clip (i965_shader_t *shader, + cairo_clip_t *clip) +{ + cairo_surface_t *clip_surface; + union i965_shader_channel *channel; + i965_surface_t *s; + + clip_surface = _cairo_clip_get_surface (clip, &shader->target->intel.drm.base); + assert (clip_surface->status == CAIRO_STATUS_SUCCESS); + assert (clip_surface->type == CAIRO_SURFACE_TYPE_DRM); + s = (i965_surface_t *) clip_surface; + + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) + i965_pipelined_flush (i965_device (s)); + + channel = &shader->clip; + channel->type.pattern = PATTERN_BASE; + channel->type.vertex = VS_NONE; + channel->type.fragment = FS_SURFACE; + + channel->base.bo = to_intel_bo (s->intel.drm.bo); + channel->base.content = CAIRO_CONTENT_ALPHA; + channel->base.format = CAIRO_FORMAT_A8; + channel->base.width = s->intel.drm.width; + channel->base.height = s->intel.drm.height; + channel->base.stride = s->intel.drm.stride; + + channel->base.extend = i965_extend (CAIRO_EXTEND_NONE); + channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST); + + cairo_matrix_init_scale (&shader->clip.base.matrix, + 1. / s->intel.drm.width, + 1. / s->intel.drm.height); + + cairo_matrix_translate (&shader->clip.base.matrix, + NEAREST_BIAS + clip->path->extents.x, + NEAREST_BIAS + clip->path->extents.y); +} + +static cairo_bool_t +i965_shader_check_aperture (i965_shader_t *shader, + i965_device_t *device) +{ + uint32_t size = device->exec.gtt_size; + + if (shader->target != device->target) { + const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); + if (bo->exec == NULL) + size += bo->base.size; + } + + if (shader->source.base.bo != NULL && shader->source.base.bo != device->source) { + const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); + if (bo->exec == NULL) + size += bo->base.size; + } + + if (shader->mask.base.bo != NULL && shader->mask.base.bo != device->mask) { + const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); + if (bo->exec == NULL) + size += bo->base.size; + } + + if (shader->clip.base.bo != NULL && shader->clip.base.bo != device->clip) { + const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo); + if (bo->exec == NULL) + size += bo->base.size; + } + + return size <= device->intel.gtt_avail_size; +} + +static cairo_status_t +i965_shader_setup_dst (i965_shader_t *shader) +{ + union i965_shader_channel *channel; + i965_surface_t *s, *clone; + + /* We need to manual blending if we have a clip surface and an unbounded op, + * or an extended blend mode. + */ + if (shader->need_combine || + (shader->op < CAIRO_OPERATOR_SATURATE && + (shader->clip.type.fragment == FS_NONE || + _cairo_operator_bounded_by_mask (shader->op)))) + { + return CAIRO_STATUS_SUCCESS; + } + + shader->need_combine = TRUE; + + s = shader->target; + + /* we need to allocate a new render target and use the original as a source */ + clone = (i965_surface_t *) + i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device, + s->intel.drm.base.content, + s->intel.drm.width, + s->intel.drm.height, + I965_TILING_DEFAULT, + TRUE); + if (unlikely (clone->intel.drm.base.status)) + return clone->intel.drm.base.status; + + if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) + i965_pipelined_flush (i965_device (s)); + + channel = &shader->dst; + + channel->type.vertex = VS_NONE; + channel->type.fragment = FS_SURFACE; + channel->type.pattern = PATTERN_SURFACE; + + /* swap buffer objects */ + channel->base.bo = to_intel_bo (s->intel.drm.bo); + s->intel.drm.bo = ((cairo_drm_surface_t *) clone)->bo; + ((cairo_drm_surface_t *) clone)->bo = &channel->base.bo->base; + + channel->base.content = s->intel.drm.base.content; + channel->base.format = s->intel.drm.format; + channel->base.width = s->intel.drm.width; + channel->base.height = s->intel.drm.height; + channel->base.stride = s->intel.drm.stride; + + channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST); + channel->base.extend = i965_extend (CAIRO_EXTEND_NONE); + + cairo_matrix_init_scale (&channel->base.matrix, + 1. / s->intel.drm.width, + 1. / s->intel.drm.height); + cairo_matrix_translate (&channel->base.matrix, + NEAREST_BIAS, + NEAREST_BIAS); + + channel->surface.surface = &clone->intel.drm.base; + + s->intel.drm.base.content = clone->intel.drm.base.content; + s->intel.drm.format = clone->intel.drm.format; + assert (s->intel.drm.width == clone->intel.drm.width); + assert (s->intel.drm.height == clone->intel.drm.height); + s->intel.drm.stride = clone->intel.drm.stride; + + return CAIRO_STATUS_SUCCESS; +} + +static inline void +constant_add_float (i965_shader_t *shader, float v) +{ + shader->constants[shader->constants_size++] = v; +} + +static inline void +i965_shader_copy_channel_constants (i965_shader_t *shader, + const union i965_shader_channel *channel) +{ + if (channel->base.constants_size) { + assert (shader->constants_size + channel->base.constants_size < ARRAY_LENGTH (shader->constants)); + + memcpy (shader->constants + shader->constants_size, + channel->base.constants, + sizeof (float) * channel->base.constants_size); + shader->constants_size += channel->base.constants_size; + } +} + +static void +i965_shader_setup_channel_constants (i965_shader_t *shader, + const union i965_shader_channel *channel) +{ + switch (channel->type.fragment) { + case FS_NONE: + case FS_CONSTANT: + /* no plane equations */ + break; + + case FS_LINEAR: + constant_add_float (shader, channel->base.matrix.xx); + constant_add_float (shader, channel->base.matrix.xy); + constant_add_float (shader, 0); + constant_add_float (shader, channel->base.matrix.x0); + break; + + case FS_RADIAL: + case FS_SURFACE: + constant_add_float (shader, channel->base.matrix.xx); + constant_add_float (shader, channel->base.matrix.xy); + constant_add_float (shader, 0); + constant_add_float (shader, channel->base.matrix.x0); + + constant_add_float (shader, channel->base.matrix.yx); + constant_add_float (shader, channel->base.matrix.yy); + constant_add_float (shader, 0); + constant_add_float (shader, channel->base.matrix.y0); + break; + + case FS_SPANS: + case FS_GLYPHS: + /* use pue from SF */ + break; + } + + i965_shader_copy_channel_constants (shader, channel); +} + +static void +i965_shader_setup_constants (i965_shader_t *shader) +{ + i965_shader_setup_channel_constants (shader, &shader->source); + i965_shader_setup_channel_constants (shader, &shader->mask); + i965_shader_setup_channel_constants (shader, &shader->clip); + i965_shader_setup_channel_constants (shader, &shader->dst); + assert (shader->constants_size < ARRAY_LENGTH (shader->constants)); +} + +/** + * Highest-valued BLENDFACTOR used in i965_blend_op. + * + * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, + * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, + * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} + */ +#define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) + +static void +i965_shader_get_blend_cntl (const i965_shader_t *shader, + uint32_t *sblend, uint32_t *dblend) +{ + static const struct blendinfo { + cairo_bool_t dst_alpha; + cairo_bool_t src_alpha; + uint32_t src_blend; + uint32_t dst_blend; + } i965_blend_op[] = { + /* CAIRO_OPERATOR_CLEAR treat as SOURCE with transparent */ + {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, + /* CAIRO_OPERATOR_SOURCE */ + {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, + /* CAIRO_OPERATOR_OVER */ + {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* CAIRO_OPERATOR_IN */ + {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, + /* CAIRO_OPERATOR_OUT */ + {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, + /* CAIRO_OPERATOR_ATOP */ + {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + + /* CAIRO_OPERATOR_DEST */ + {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, + /* CAIRO_OPERATOR_DEST_OVER */ + {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, + /* CAIRO_OPERATOR_DEST_IN */ + {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, + /* CAIRO_OPERATOR_DEST_OUT */ + {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* CAIRO_OPERATOR_DEST_ATOP */ + {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, + /* CAIRO_OPERATOR_XOR */ + {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* CAIRO_OPERATOR_ADD */ + {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, + }; + const struct blendinfo *op = &i965_blend_op[shader->op]; + + *sblend = op->src_blend; + *dblend = op->dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (shader->target->intel.drm.base.content == CAIRO_CONTENT_COLOR && + op->dst_alpha) + { + if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) + *sblend = BRW_BLENDFACTOR_ONE; + else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) + *sblend = BRW_BLENDFACTOR_ZERO; + } +} + +static void +emit_wm_subpans_to_pixels (struct brw_compile *compile, + int tmp) +{ + /* Inputs: + * R1.5 x/y of upper-left pixel of subspan 3 + * R1.4 x/y of upper-left pixel of subspan 2 + * R1.3 x/y of upper-left pixel of subspan 1 + * R1.2 x/y of upper-left pixel of subspan 0 + * + * Outputs: + * M1,2: u + * M3,4: v + * + * upper left, upper right, lower left, lower right. + */ + + /* compute pixel locations for each subspan */ + brw_set_compression_control (compile, BRW_COMPRESSION_NONE); + brw_ADD (compile, + brw_vec8_grf (tmp), + brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 4, + BRW_REGISTER_TYPE_UW, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_NOOP, + WRITEMASK_XYZW), + brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE)); + brw_ADD (compile, + brw_vec8_grf (tmp+1), + brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 8, + BRW_REGISTER_TYPE_UW, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_NOOP, + WRITEMASK_XYZW), + brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE)); + brw_ADD (compile, + brw_vec8_grf (tmp+2), + brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 5, + BRW_REGISTER_TYPE_UW, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_NOOP, + WRITEMASK_XYZW), + brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE)); + brw_ADD (compile, + brw_vec8_grf (tmp+3), + brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 9, + BRW_REGISTER_TYPE_UW, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_NOOP, + WRITEMASK_XYZW), + brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE)); + brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED); +} + +static void +emit_wm_affine (struct brw_compile *compile, + int tmp, int reg, int msg) +{ + emit_wm_subpans_to_pixels (compile, tmp); + + brw_LINE (compile, + brw_null_reg (), + brw_vec1_grf (reg, 0), + brw_vec8_grf (tmp)); + brw_MAC (compile, + brw_message_reg (msg + 1), + brw_vec1_grf (reg, 1), + brw_vec8_grf (tmp+2)); + + brw_LINE (compile, + brw_null_reg (), + brw_vec1_grf (reg, 4), + brw_vec8_grf (tmp)); + brw_MAC (compile, + brw_message_reg (msg + 3), + brw_vec1_grf (reg, 5), + brw_vec8_grf (tmp+2)); +} + +static void +emit_wm_glyph (struct brw_compile *compile, + int tmp, int vue, int msg) +{ + emit_wm_subpans_to_pixels (compile, tmp); + + brw_MUL (compile, + brw_null_reg (), + brw_vec8_grf (tmp), + brw_imm_f (1./1024)); + brw_ADD (compile, + brw_message_reg (msg + 1), + brw_acc_reg (), + brw_vec1_grf (vue, 0)); + + brw_MUL (compile, + brw_null_reg (), + brw_vec8_grf (tmp + 2), + brw_imm_f (1./1024)); + brw_ADD (compile, + brw_message_reg (msg + 3), + brw_acc_reg (), + brw_vec1_grf (vue, 1)); +} + +static void +emit_wm_load_constant (struct brw_compile *compile, + int reg, + struct brw_reg *result) +{ + int n; + + for (n = 0; n < 4; n++) { + result[n] = result[n+4] = brw_reg (BRW_GENERAL_REGISTER_FILE, reg, n, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_XYZW); + } +} + +static void +emit_wm_load_opacity (struct brw_compile *compile, + int reg, + struct brw_reg *result) +{ + result[0] = result[1] = result[2] = result[3] = + result[4] = result[5] = result[6] = result[7] = + brw_reg (BRW_GENERAL_REGISTER_FILE, reg, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XXXX, + WRITEMASK_XYZW); +} + +static void +emit_wm_load_linear (struct brw_compile *compile, + int tmp, int reg, int msg) +{ + emit_wm_subpans_to_pixels (compile, tmp); + + brw_LINE (compile, + brw_null_reg(), + brw_vec1_grf (reg, 0), + brw_vec8_grf (tmp)); + brw_MAC (compile, + brw_message_reg(msg + 1), + brw_vec1_grf (reg, 1), + brw_vec8_grf (tmp + 2)); +} + +static void +emit_wm_load_radial (struct brw_compile *compile, + int reg, int msg) + +{ + struct brw_reg c1x = brw_vec1_grf (reg, 0); + struct brw_reg c1y = brw_vec1_grf (reg, 1); + struct brw_reg minus_r_sq = brw_vec1_grf (reg, 3); + struct brw_reg cdx = brw_vec1_grf (reg, 4); + struct brw_reg cdy = brw_vec1_grf (reg, 5); + struct brw_reg neg_4a = brw_vec1_grf (reg + 1, 0); + struct brw_reg inv_2a = brw_vec1_grf (reg + 1, 1); + + struct brw_reg tmp_x = brw_uw16_grf (30, 0); + struct brw_reg tmp_y = brw_uw16_grf (28, 0); + struct brw_reg det = brw_vec8_grf (22); + struct brw_reg b = brw_vec8_grf (20); + struct brw_reg c = brw_vec8_grf (18); + struct brw_reg pdx = brw_vec8_grf (16); + struct brw_reg pdy = brw_vec8_grf (14); + struct brw_reg t = brw_message_reg (msg + 1); + + /* cdx = (c₂x - c₁x) + * cdy = (c₂y - c₁y) + * dr = r₂-r₁ + * pdx = px - c₁x + * pdy = py - c₁y + * + * A = cdx² + cdy² - dr² + * B = -2·(pdx·cdx + pdy·cdy + r₁·dr) + * C = pdx² + pdy² - r₁² + * + * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A + */ + + brw_ADD (compile, pdx, vec8 (tmp_x), negate (c1x)); + brw_ADD (compile, pdy, vec8 (tmp_y), negate (c1y)); + + brw_LINE (compile, brw_null_reg (), cdx, pdx); + brw_MAC (compile, b, cdy, pdy); + + brw_MUL (compile, brw_null_reg (), pdx, pdx); + brw_MAC (compile, c, pdy, pdy); + brw_ADD (compile, c, c, minus_r_sq); + + brw_MUL (compile, brw_null_reg (), b, b); + brw_MAC (compile, det, neg_4a, c); + + /* XXX use rsqrt like i915?, it's faster and we need to mac anyway */ + brw_math (compile, + det, + BRW_MATH_FUNCTION_SQRT, + BRW_MATH_SATURATE_NONE, + 2, + det, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + /* XXX cmp, +- */ + + brw_ADD (compile, det, negate (det), negate (b)); + brw_ADD (compile, det, det, negate (b)); + brw_MUL (compile, t, det, inv_2a); +} + +static int +emit_wm_sample (struct brw_compile *compile, + union i965_shader_channel *channel, + int sampler, + int msg_base, int msg_len, + int dst, + struct brw_reg *result) +{ + int response_len, mask; + + if (channel->base.content == CAIRO_CONTENT_ALPHA) { + mask = 0x7000; + response_len = 2; + result[0] = result[1] = result[2] = result[3] = brw_vec8_grf (dst); + result[4] = result[5] = result[6] = result[7] = brw_vec8_grf (dst + 1); + } else { + mask = 0; + response_len = 8; + result[0] = brw_vec8_grf (dst + 0); + result[1] = brw_vec8_grf (dst + 2); + result[2] = brw_vec8_grf (dst + 4); + result[3] = brw_vec8_grf (dst + 6); + result[4] = brw_vec8_grf (dst + 1); + result[5] = brw_vec8_grf (dst + 3); + result[6] = brw_vec8_grf (dst + 5); + result[7] = brw_vec8_grf (dst + 7); + } + + brw_set_compression_control (compile, BRW_COMPRESSION_NONE); + + brw_set_mask_control (compile, BRW_MASK_DISABLE); + brw_MOV (compile, + get_element_ud (brw_vec8_grf (0), 2), + brw_imm_ud (mask)); + brw_set_mask_control (compile, BRW_MASK_ENABLE); + + brw_SAMPLE (compile, + brw_uw16_grf (dst, 0), + msg_base, + brw_uw8_grf (0, 0), + sampler + 1, /* binding table */ + sampler, + WRITEMASK_XYZW, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE, + response_len, + msg_len, + 0 /* eot */); + + brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED); + + return response_len; +} + +#define MAX_MSG_REGISTER 16 + +static void +emit_wm_load_channel (struct brw_compile *compile, + union i965_shader_channel *channel, + int *vue, + int *cue, + int *msg, + int *sampler, + int *grf, + struct brw_reg *result) +{ + switch (channel->type.fragment) { + case FS_NONE: + break; + + case FS_CONSTANT: + emit_wm_load_constant (compile, *cue, result); + *cue += 1; + break; + + case FS_RADIAL: + emit_wm_load_radial (compile, *cue, *msg); + *cue += 2; + + if (*msg + 3 > MAX_MSG_REGISTER) + *msg = 1; + + *grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result); + *sampler += 1; + *msg += 3; + break; + + case FS_LINEAR: + emit_wm_load_linear (compile, *grf, *cue, *msg); + *cue += 1; + + if (*msg + 3 > MAX_MSG_REGISTER) + *msg = 1; + + *grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result); + *sampler += 1; + *msg += 3; + break; + + case FS_SURFACE: + emit_wm_affine (compile, *grf, *cue, *msg); + *cue += 2; + + if (*msg + 5 > MAX_MSG_REGISTER) + *msg = 1; + + *grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result); + *sampler += 1; + *msg += 5; + break; + + case FS_SPANS: + emit_wm_load_opacity (compile, *vue, result); + *vue += 1; + break; + + case FS_GLYPHS: + emit_wm_glyph (compile, *grf, *vue, *msg); + *vue += 1; + + if (*msg + 5 > MAX_MSG_REGISTER) + *msg = 1; + + *grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result); + *sampler += 1; + *msg += 5; + break; + } +} + +static unsigned long +i965_wm_kernel_hash (const i965_shader_t *shader) +{ + unsigned long hash; + + hash = + (shader->source.type.fragment & 0xff) | + (shader->mask.type.fragment & 0xff) << 8 | + (shader->clip.type.fragment & 0xff) << 16; + if (shader->need_combine) + hash |= (1 + shader->op) << 24; + + return hash; +} + +static void +i965_wm_kernel_init (struct i965_wm_kernel *key, + const i965_shader_t *shader) +{ + key->entry.hash = i965_wm_kernel_hash (shader); +} + +static uint32_t +i965_shader_const_urb_length (i965_shader_t *shader) +{ + const int lengths[] = { 0, 1, 1, 4, 2, 0, 0 }; + int count = 0; /* 128-bit/16-byte increments */ + + count += lengths[shader->source.type.fragment]; + count += lengths[shader->mask.type.fragment]; + count += lengths[shader->clip.type.fragment]; + count += lengths[shader->dst.type.fragment]; + + return (count + 1) / 2; /* 256-bit/32-byte increments */ +} + +static uint32_t +i965_shader_pue_length (i965_shader_t *shader) +{ + return 1 + (shader->mask.type.vertex != VS_NONE); +} + +static uint32_t +create_wm_kernel (i965_device_t *device, + i965_shader_t *shader, + int *num_reg) +{ + struct brw_compile compile; + struct brw_reg source[8], mask[8], clip[8], dst[8]; + const uint32_t *program; + uint32_t size; + int msg, cue, vue, grf, sampler; + int i; + + struct i965_wm_kernel key, *cache; + cairo_status_t status; + uint32_t offset; + + i965_wm_kernel_init (&key, shader); + cache = _cairo_hash_table_lookup (device->wm_kernels, &key.entry); + if (cache != NULL) + return cache->offset; + + brw_compile_init (&compile, device->is_g4x); + + if (key.entry.hash == FS_CONSTANT && + to_intel_bo (shader->target->intel.drm.bo)->tiling) + { + struct brw_instruction *insn; + + assert (i965_shader_const_urb_length (shader) == 1); + brw_MOV (&compile, brw_message4_reg (2), brw_vec4_grf (2, 0)); + grf = 3; + + brw_push_insn_state (&compile); + brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */ + brw_MOV (&compile, + retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD), + retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state (&compile); + + insn = brw_next_instruction (&compile, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = 0; + + brw_instruction_set_destination (insn, + retype (vec16 (brw_acc_reg ()), + BRW_REGISTER_TYPE_UW)); + + brw_instruction_set_source0 (insn, + retype (brw_vec8_grf (0), + BRW_REGISTER_TYPE_UW)); + + brw_instruction_set_dp_write_message (insn, + 0, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + 3, + 1, /* pixel scoreboard */ + 0, + TRUE); + } + else + { + msg = 1; + cue = 2; + vue = cue + i965_shader_const_urb_length (shader); + grf = vue + i965_shader_pue_length (shader); + sampler = 0; + + brw_set_compression_control (&compile, BRW_COMPRESSION_COMPRESSED); + emit_wm_load_channel (&compile, &shader->source, + &vue, &cue, &msg, &sampler, &grf, + source); + emit_wm_load_channel (&compile, &shader->mask, + &vue, &cue, &msg, &sampler, &grf, + mask); + emit_wm_load_channel (&compile, &shader->clip, + &vue, &cue, &msg, &sampler, &grf, + clip); + emit_wm_load_channel (&compile, &shader->dst, + &vue, &cue, &msg, &sampler, &grf, + dst); + brw_set_compression_control (&compile, BRW_COMPRESSION_NONE); + + if (shader->need_combine) { + if (shader->mask.type.fragment != FS_NONE && + shader->clip.type.fragment != FS_NONE) + { + for (i = 0; i < 8; i++) + brw_MUL (&compile, mask[i], mask[i], clip[i]); + } + + /* XXX LERP ! */ + for (i = 0; i < 8; i++) + brw_MOV (&compile, brw_message_reg (2 + i), source[i]); + } else { + if (shader->mask.type.fragment != FS_NONE) { + if (shader->clip.type.fragment != FS_NONE) { + for (i = 0; i < 8; i++) + brw_MUL (&compile, mask[i], mask[i], clip[i]); + } + + for (i = 0; i < 8; i++) + brw_MUL (&compile, brw_message_reg (2 + i), source[i], mask[i]); + } else { + if (shader->clip.type.fragment != FS_NONE) { + for (i = 0; i < 8; i++) + brw_MUL (&compile, brw_message_reg (2 + i), source[i], clip[i]); + } else { + for (i = 0; i < 8; i++) + brw_MOV (&compile, brw_message_reg (2 + i), source[i]); + } + } + } + + brw_push_insn_state (&compile); + brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */ + brw_MOV (&compile, + retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD), + retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state (&compile); + + brw_fb_WRITE (&compile, + retype (vec16 (brw_acc_reg ()), BRW_REGISTER_TYPE_UW), + 0, /* base reg */ + retype (brw_vec8_grf (0), BRW_REGISTER_TYPE_UW), + 0, /* binding table index */ + 2 + 8, /* msg length */ + 0, /* response length */ + TRUE); /* EOT */ + } + + program = brw_get_program (&compile, &size); + *num_reg = grf; + + i965_stream_align (&device->general, 64); + offset = i965_stream_emit (&device->general, program, size); + + cache = _cairo_freelist_alloc (&device->wm_kernel_freelist); + if (likely (cache != NULL)) { + i965_wm_kernel_init (cache, shader); + cache->offset = offset; + status = _cairo_hash_table_insert (device->wm_kernels, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->wm_kernel_freelist, cache); + } + + return offset; +} + +static uint32_t +create_sf_kernel (i965_device_t *device, + i965_shader_t *shader) +{ + struct brw_compile compile; + const uint32_t *program; + uint32_t size; + int msg_len; + + brw_compile_init (&compile, device->is_g4x); + + switch (shader->mask.type.vertex) { + default: + case VS_NONE: + /* use curb plane eq in WM */ + msg_len = 1; + break; + + case VS_SPANS: + /* just a constant opacity */ + brw_MOV (&compile, + brw_message4_reg (1), + brw_vec4_grf (3, 0)); + msg_len = 2; + break; + + case VS_GLYPHS: + /* an offset+sf into the glyph cache */ + brw_MOV (&compile, + brw_acc_reg (), + brw_vec2_grf (3, 0)); + brw_MAC (&compile, + brw_message4_reg (1), + negate (brw_vec2_grf (1, 4)), + brw_imm_f (1./1024)); + msg_len = 2; + break; + } + + brw_urb_WRITE (&compile, + brw_null_reg (), + 0, + brw_vec8_grf (0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + msg_len, + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* offset */ + BRW_URB_SWIZZLE_NONE); + + program = brw_get_program (&compile, &size); + + i965_stream_align (&device->general, 64); + return i965_stream_emit (&device->general, program, size); +} + +static uint32_t +i965_sf_kernel (const i965_shader_t *shader) +{ + return shader->mask.type.vertex; +} + +static void +i965_sf_state_init (struct i965_sf_state *key, + const i965_shader_t *shader) +{ + key->entry.hash = i965_sf_kernel (shader); +} + +cairo_bool_t +i965_sf_state_equal (const void *A, const void *B) +{ + const cairo_hash_entry_t *a = A, *b = B; + return a->hash == b->hash; +} + +/** + * Sets up the SF state pointing at an SF kernel. + * + * The SF kernel does coord interp: for each attribute, + * calculate dA/dx and dA/dy. Hand these interpolation coefficients + * back to SF which then hands pixels off to WM. + */ +static uint32_t +gen4_create_sf_state (i965_device_t *device, + i965_shader_t *shader) +{ + struct brw_sf_unit_state *state; + struct i965_sf_state key, *cache; + cairo_status_t status; + uint32_t offset; + + i965_sf_state_init (&key, shader); + if (i965_sf_state_equal (&key, &device->sf_state)) + return device->sf_state.offset; + + cache = _cairo_hash_table_lookup (device->sf_states, &key.entry); + if (cache != NULL) { + offset = cache->offset; + goto DONE; + } + + offset = create_sf_kernel (device, shader); + + state = i965_stream_alloc (&device->general, 32, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + state->thread0.grf_reg_count = BRW_GRF_BLOCKS (3); + assert ((offset & 63) == 0); + state->thread0.kernel_start_pointer = offset >> 6; + state->sf1.single_program_flow = 1; + state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + state->thread3.urb_entry_read_offset = 1; + state->thread3.dispatch_grf_start_reg = 3; + state->thread4.max_threads = SF_MAX_THREADS - 1; + state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + state->thread4.nr_urb_entries = URB_SF_ENTRIES; + state->sf6.dest_org_vbias = 0x8; + state->sf6.dest_org_hbias = 0x8; + + offset = i965_stream_offsetof (&device->general, state); + + cache = _cairo_freelist_alloc (&device->sf_freelist); + if (likely (cache != NULL)) { + i965_sf_state_init (cache, shader); + cache->offset = offset; + status = _cairo_hash_table_insert (device->sf_states, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->sf_freelist, cache); + } + + DONE: + i965_sf_state_init (&device->sf_state, shader); + device->sf_state.offset = offset; + + return offset; +} + +static unsigned long +i965_shader_sampler_hash (const i965_shader_t *shader) +{ + unsigned long hash = 0; + unsigned int offset = 0; + + if (shader->source.base.bo != NULL) { + hash |= (shader->source.base.filter << offset) | + (shader->source.base.extend << (offset + 4)); + offset += 8; + } + + if (shader->mask.base.bo != NULL) { + hash |= (shader->mask.base.filter << offset) | + (shader->mask.base.extend << (offset + 4)); + offset += 8; + } + + if (shader->clip.base.bo != NULL) { + hash |= (shader->clip.base.filter << offset) | + (shader->clip.base.extend << (offset + 4)); + offset += 8; + } + + if (shader->dst.base.bo != NULL) { + hash |= (shader->dst.base.filter << offset) | + (shader->dst.base.extend << (offset + 4)); + offset += 8; + } + + return hash; +} + +static void +i965_sampler_init (struct i965_sampler *key, + const i965_shader_t *shader) +{ + key->entry.hash = i965_shader_sampler_hash (shader); +} + +static void +emit_sampler_channel (i965_device_t *device, + const union i965_shader_channel *channel, + uint32_t border_color) +{ + struct brw_sampler_state *state; + + state = i965_stream_alloc (&device->general, 0, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + state->ss0.lod_preclamp = 1; /* GL mode */ + + state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; + + state->ss0.min_filter = channel->base.filter; + state->ss0.mag_filter = channel->base.filter; + + state->ss1.r_wrap_mode = channel->base.extend; + state->ss1.s_wrap_mode = channel->base.extend; + state->ss1.t_wrap_mode = channel->base.extend; + + assert ((border_color & 31) == 0); + state->ss2.border_color_pointer = border_color >> 5; +} + +static uint32_t +emit_sampler_state_table (i965_device_t *device, + i965_shader_t *shader) +{ + struct i965_sampler key, *cache; + cairo_status_t status; + uint32_t offset; + + if (device->border_color_offset == (uint32_t) -1) { + struct brw_sampler_legacy_border_color *border_color; + + border_color = i965_stream_alloc (&device->general, 32, + sizeof (*border_color)); + border_color->color[0] = 0; /* R */ + border_color->color[1] = 0; /* G */ + border_color->color[2] = 0; /* B */ + border_color->color[3] = 0; /* A */ + + device->border_color_offset = i965_stream_offsetof (&device->general, + border_color); + } else { + i965_sampler_init (&key, shader); + cache = _cairo_hash_table_lookup (device->samplers, &key.entry); + if (cache != NULL) + return cache->offset; + } + + i965_stream_align (&device->general, 32); + offset = device->general.used; + if (shader->source.base.bo != NULL) { + emit_sampler_channel (device, + &shader->source, + device->border_color_offset); + } + if (shader->mask.base.bo != NULL) { + emit_sampler_channel (device, + &shader->mask, + device->border_color_offset); + } + if (shader->clip.base.bo != NULL) { + emit_sampler_channel (device, + &shader->clip, + device->border_color_offset); + } + if (shader->dst.base.bo != NULL) { + emit_sampler_channel (device, + &shader->dst, + device->border_color_offset); + } + + cache = _cairo_freelist_alloc (&device->sampler_freelist); + if (likely (cache != NULL)) { + i965_sampler_init (cache, shader); + cache->offset = offset; + status = _cairo_hash_table_insert (device->samplers, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->sampler_freelist, cache); + } + + return offset; +} + +static void +i965_cc_state_init (struct i965_cc_state *key, + const i965_shader_t *shader) +{ + uint32_t src_blend, dst_blend; + + if (shader->need_combine) + src_blend = dst_blend = 0; + else + i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend); + + key->entry.hash = src_blend | ((dst_blend & 0xffff) << 16); +} + +cairo_bool_t +i965_cc_state_equal (const void *A, const void *B) +{ + const cairo_hash_entry_t *a = A, *b = B; + return a->hash == b->hash; +} + +static uint32_t +cc_state_emit (i965_device_t *device, i965_shader_t *shader) +{ + struct brw_cc_unit_state *state; + struct i965_cc_state key, *cache; + cairo_status_t status; + uint32_t src_blend, dst_blend; + uint32_t offset; + + i965_cc_state_init (&key, shader); + if (i965_cc_state_equal (&key, &device->cc_state)) + return device->cc_state.offset; + + cache = _cairo_hash_table_lookup (device->cc_states, &key.entry); + if (cache != NULL) { + offset = cache->offset; + goto DONE; + } + + if (shader->need_combine) + src_blend = dst_blend = 0; + else + i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend); + + state = i965_stream_alloc (&device->general, 64, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + /* XXX Note errata, need to flush render cache when blend_enable 0 -> 1 */ + /* XXX 2 source blend */ + state->cc3.blend_enable = ! shader->need_combine; + state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; + state->cc5.ia_src_blend_factor = src_blend; + state->cc5.ia_dest_blend_factor = dst_blend; + state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; + state->cc6.clamp_post_alpha_blend = 1; + state->cc6.clamp_pre_alpha_blend = 1; + state->cc6.src_blend_factor = src_blend; + state->cc6.dest_blend_factor = dst_blend; + + offset = i965_stream_offsetof (&device->general, state); + + cache = _cairo_freelist_alloc (&device->cc_freelist); + if (likely (cache != NULL)) { + i965_cc_state_init (cache, shader); + cache->offset = offset; + status = _cairo_hash_table_insert (device->cc_states, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->cc_freelist, cache); + } + + DONE: + i965_cc_state_init (&device->cc_state, shader); + device->cc_state.offset = offset; + + return offset; +} + +static void +i965_wm_state_init (struct i965_wm_state *key, + const i965_shader_t *shader) +{ + key->kernel = i965_wm_kernel_hash (shader); + key->sampler = i965_shader_sampler_hash (shader); + + key->entry.hash = key->kernel ^ ((key->sampler) << 16 | (key->sampler >> 16)); +} + +cairo_bool_t +i965_wm_state_equal (const void *A, const void *B) +{ + const struct i965_wm_state *a = A, *b = B; + + if (a->entry.hash != b->entry.hash) + return FALSE; + + return a->kernel == b->kernel && a->sampler == b->sampler; +} + +static int +i965_shader_binding_table_count (i965_shader_t *shader) +{ + int count; + + count = 1; + if (shader->source.type.fragment != FS_CONSTANT) + count++; + switch (shader->mask.type.fragment) { + case FS_NONE: + case FS_CONSTANT: + case FS_SPANS: + break; + case FS_LINEAR: + case FS_RADIAL: + case FS_SURFACE: + case FS_GLYPHS: + count++; + } + if (shader->clip.type.fragment == FS_SURFACE) + count++; + if (shader->dst.type.fragment == FS_SURFACE) + count++; + + return count; +} + +static uint32_t +gen4_create_wm_state (i965_device_t *device, + i965_shader_t *shader) +{ + struct brw_wm_unit_state *state; + uint32_t sampler; + uint32_t kernel; + + struct i965_wm_state key, *cache; + cairo_status_t status; + int num_reg; + + i965_wm_state_init (&key, shader); + if (i965_wm_state_equal (&key, &device->wm_state)) + return device->wm_state.offset; + + cache = _cairo_hash_table_lookup (device->wm_states, &key.entry); + if (cache != NULL) { + device->wm_state = *cache; + return cache->offset; + } + + kernel = create_wm_kernel (device, shader, &num_reg); + sampler = emit_sampler_state_table (device, shader); + + state = i965_stream_alloc (&device->general, 32, sizeof (*state)); + memset (state, 0, sizeof (*state)); + state->thread0.grf_reg_count = BRW_GRF_BLOCKS (num_reg); + assert ((kernel & 63) == 0); + state->thread0.kernel_start_pointer = kernel >> 6; + + state->thread3.dispatch_grf_start_reg = 2; + + state->wm4.sampler_count = 1; /* 1-4 samplers used */ + assert ((sampler & 31) == 0); + state->wm4.sampler_state_pointer = sampler >> 5; + if (device->is_g4x) + state->wm5.max_threads = PS_MAX_THREADS_CTG - 1; + else + state->wm5.max_threads = PS_MAX_THREADS_BRW - 1; + state->wm5.thread_dispatch_enable = 1; + + if (device->is_g4x) { + /* XXX contiguous 32 pixel dispatch */ + } + state->wm5.enable_16_pix = 1; + /* 8 pixel dispatch and friends */ + //state->wm5.early_depth_test = 1; + + state->thread1.binding_table_entry_count = i965_shader_binding_table_count(shader); + state->thread3.urb_entry_read_length = i965_shader_pue_length (shader); + state->thread3.const_urb_entry_read_length = i965_shader_const_urb_length (shader); + + key.offset = i965_stream_offsetof (&device->general, state); + + cache = _cairo_freelist_alloc (&device->wm_state_freelist); + if (likely (cache != NULL)) { + *cache = key; + status = _cairo_hash_table_insert (device->wm_states, &cache->entry); + if (unlikely (status)) + _cairo_freelist_free (&device->wm_state_freelist, cache); + } + + device->wm_state = key; + return key.offset; +} + +static uint32_t +vs_unit_state_emit (i965_device_t *device) +{ + if (device->vs_offset == (uint32_t) -1) { + struct brw_vs_unit_state *state; + + /* Set up the vertex shader to be disabled (passthrough) */ + state = i965_stream_alloc (&device->general, 32, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + state->thread4.nr_urb_entries = URB_VS_ENTRIES; + state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; + state->vs6.vert_cache_disable = 1; + + device->vs_offset = i965_stream_offsetof (&device->general, state); + } + + return device->vs_offset; +} + +static uint32_t +i965_get_card_format (cairo_format_t format) +{ + switch (format) { + case CAIRO_FORMAT_ARGB32: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + case CAIRO_FORMAT_RGB24: + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + case CAIRO_FORMAT_A8: + return BRW_SURFACEFORMAT_A8_UNORM; + case CAIRO_FORMAT_A1: + default: + ASSERT_NOT_REACHED; + return 0; + } +} + +static uint32_t +i965_get_dest_format (cairo_format_t format) +{ + switch (format) { + case CAIRO_FORMAT_ARGB32: + case CAIRO_FORMAT_RGB24: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + case CAIRO_FORMAT_A8: + return BRW_SURFACEFORMAT_A8_UNORM; + case CAIRO_FORMAT_A1: + default: + ASSERT_NOT_REACHED; + return 0; + } +} + +/* XXX silly inline due to compiler bug... */ +static inline void +i965_stream_add_pending_relocation (i965_stream_t *stream, + uint32_t target_offset, + uint32_t read_domains, + uint32_t write_domain, + uint32_t delta) +{ + int n; + + n = stream->num_pending_relocations++; + assert (n < stream->max_pending_relocations); + + stream->pending_relocations[n].offset = target_offset; + stream->pending_relocations[n].read_domains = read_domains; + stream->pending_relocations[n].write_domain = write_domain;; + stream->pending_relocations[n].delta = delta; +} + +static uint32_t +emit_surface_state (i965_device_t *device, + cairo_bool_t is_target, + intel_bo_t *bo, + cairo_format_t format, + int width, int height, int stride, + int type) +{ + struct brw_surface_state *state; + uint32_t write_domain, read_domains; + uint32_t offset; + + state = i965_stream_alloc (&device->surface, 32, sizeof (*state)); + memset (state, 0, sizeof (*state)); + + state->ss0.surface_type = type; + if (is_target) + state->ss0.surface_format = i965_get_dest_format (format); + else + state->ss0.surface_format = i965_get_card_format (format); + + state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; + state->ss0.color_blend = 1; + if (is_target && device->is_g4x) + state->ss0.render_cache_read_mode = 1; + + state->ss1.base_addr = bo->offset; + + state->ss2.height = height - 1; + state->ss2.width = width - 1; + state->ss3.pitch = stride - 1; + state->ss3.tile_walk = bo->tiling == I915_TILING_Y; + state->ss3.tiled_surface = bo->tiling != I915_TILING_NONE; + + if (is_target) { + read_domains = I915_GEM_DOMAIN_RENDER; + write_domain = I915_GEM_DOMAIN_RENDER; + } else { + read_domains = I915_GEM_DOMAIN_SAMPLER; + write_domain = 0; + } + + offset = i965_stream_offsetof (&device->surface, state); + i965_emit_relocation (device, &device->surface, + bo, 0, + read_domains, write_domain, + offset + offsetof (struct brw_surface_state, ss1.base_addr)); + return offset; +} + +static uint32_t +emit_surface_state_for_shader (i965_device_t *device, + const union i965_shader_channel *channel) +{ + int type = BRW_SURFACE_2D; + + assert (channel->type.fragment != FS_NONE); + assert (channel->type.fragment != FS_CONSTANT); + + if (channel->type.fragment != FS_SURFACE) + type = BRW_SURFACE_1D; + + return emit_surface_state (device, FALSE, + channel->base.bo, + channel->base.format, + channel->base.width, + channel->base.height, + channel->base.stride, + type); +} + +cairo_bool_t +i965_wm_binding_equal (const void *A, + const void *B) +{ + const struct i965_wm_binding *a = A, *b = B; + + if (a->entry.hash != b->entry.hash) + return FALSE; + + if (a->size != b->size) + return FALSE; + + return memcmp (a->table, b->table, sizeof (uint32_t) * a->size) == 0; +} + +static void +i965_wm_binding_init (struct i965_wm_binding *state, + const uint32_t *table, + int size) +{ + int n; + + state->entry.hash = size; + state->size = size; + + for (n = 0; n < size; n++) { + state->table[n] = table[n]; + state->entry.hash ^= (table[n] << (8 * n)) | + (table[n] >> (32 - (8*n))); + } +} + +static uint32_t +emit_binding_table (i965_device_t *device, + i965_shader_t *shader) +{ + intel_bo_t *bo; + struct i965_wm_binding key, *cache; + uint32_t *table; + int n = 0; + + table = i965_stream_alloc (&device->surface, 32, 5 * sizeof (uint32_t)); + if (shader->target->stream != device->surface.serial) { + shader->target->stream = device->surface.serial; + shader->target->offset = emit_surface_state (device, + TRUE, + to_intel_bo (shader->target->intel.drm.bo), + shader->target->intel.drm.format, + shader->target->intel.drm.width, + shader->target->intel.drm.height, + shader->target->intel.drm.stride, + BRW_SURFACE_2D); + } + table[n++] = shader->target->offset; + + bo = shader->source.base.bo; + if (bo != NULL) { + if (bo->opaque0 != device->surface.serial) { + bo->opaque0 = device->surface.serial; + bo->opaque1 = emit_surface_state_for_shader (device, &shader->source); + } + table[n++] = bo->opaque1; + } + + bo = shader->mask.base.bo; + if (bo != NULL) { + if (bo->opaque0 != device->surface.serial) { + bo->opaque0 = device->surface.serial; + bo->opaque1 = emit_surface_state_for_shader (device, &shader->mask); + } + table[n++] = bo->opaque1; + } + + bo = shader->clip.base.bo; + if (bo != NULL) { + if (bo->opaque0 != device->surface.serial) { + bo->opaque0 = device->surface.serial; + bo->opaque1 = emit_surface_state_for_shader (device, &shader->clip); + } + table[n++] = bo->opaque1; + } + + bo = shader->dst.base.bo; + if (bo != NULL) { + if (bo->opaque0 != device->surface.serial) { + bo->opaque0 = device->surface.serial; + bo->opaque1 = emit_surface_state_for_shader (device, &shader->dst); + } + table[n++] = bo->opaque1; + } + + i965_wm_binding_init (&key, table, n); + key.offset = i965_stream_offsetof (&device->surface, table); + + if (i965_wm_binding_equal (&key, &device->wm_binding)) { + device->surface.used = key.offset; + return device->wm_binding.offset; + } + + cache = _cairo_hash_table_lookup (device->wm_bindings, &key.entry); + if (cache != NULL) { + device->surface.used = key.offset; + key.offset = cache->offset; + } + + device->wm_binding = key; + return key.offset; +} + +static void +i965_emit_invariants (i965_device_t *device) +{ + OUT_BATCH (BRW_CS_URB_STATE | 0); + OUT_BATCH (((URB_CS_ENTRY_SIZE-1) << 4) | (URB_CS_ENTRIES << 0)); +} + +static void +i965_emit_urb_fences (i965_device_t *device) +{ + int urb_vs_start, urb_vs_size; + int urb_gs_start, urb_gs_size; + int urb_clip_start, urb_clip_size; + int urb_sf_start, urb_sf_size; + int urb_cs_start, urb_cs_size; + + if (device->have_urb_fences) + return; + + /* URB fence */ + urb_vs_start = 0; + urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; + urb_gs_start = urb_vs_start + urb_vs_size; + urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; + urb_clip_start = urb_gs_start + urb_gs_size; + urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; + urb_sf_start = urb_clip_start + urb_clip_size; + urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; + urb_cs_start = urb_sf_start + urb_sf_size; + urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + + /* erratum: URB_FENCE must not cross a 64-byte cache-line */ + while ((device->batch.used & 63) > 64-12) + OUT_BATCH (MI_NOOP); + OUT_BATCH (BRW_URB_FENCE | + UF0_CS_REALLOC | + UF0_SF_REALLOC | + UF0_CLIP_REALLOC | + UF0_GS_REALLOC | + UF0_VS_REALLOC | + 1); + OUT_BATCH (((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | + ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | + ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); + OUT_BATCH (((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | + ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); + + device->have_urb_fences = TRUE; + device->constants_size = 0; +} + +static void +i965_emit_base (i965_device_t *device) +{ + OUT_BATCH (BRW_STATE_BASE_ADDRESS | 4); + if (likely (device->general.num_pending_relocations == 0)) { + i965_stream_add_pending_relocation (&device->general, + device->batch.used, + I915_GEM_DOMAIN_INSTRUCTION, 0, + BASE_ADDRESS_MODIFY); + } + OUT_BATCH (0); /* pending relocation */ + + if (likely (device->surface.num_pending_relocations == 0)) { + i965_stream_add_pending_relocation (&device->surface, + device->batch.used, + I915_GEM_DOMAIN_INSTRUCTION, 0, + BASE_ADDRESS_MODIFY); + } + OUT_BATCH (0); /* pending relocation */ + + OUT_BATCH (0 | BASE_ADDRESS_MODIFY); + /* general state max addr, disabled */ + OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY); + /* media object state max addr, disabled */ + OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY); +} + +static void +i965_emit_vertex_element (i965_device_t *device, + i965_shader_t *shader) +{ + uint32_t offset; + uint32_t type; + int nelem; + + type = 0; + nelem = 1; + if (shader->mask.type.vertex == VS_SPANS || + shader->mask.type.vertex == VS_GLYPHS) + { + type = shader->mask.type.vertex; + nelem++; + } + + if (type == device->vertex_type) + return; + device->vertex_type = type; + + offset = 0; + + OUT_BATCH (BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1)); + OUT_BATCH ((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (offset << VE0_OFFSET_SHIFT)); + OUT_BATCH ((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + offset += 8; + + assert (shader->source.type.vertex == VS_NONE); + switch (shader->mask.type.vertex) { + default: + case VS_NONE: + break; + + case VS_SPANS: + OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT) | + (offset << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + + offset += 4; + break; + + case VS_GLYPHS: + OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R16G16_FLOAT << VE0_FORMAT_SHIFT) | + (offset << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + + offset += 4; + break; + } + assert (shader->clip.type.vertex == VS_NONE); + assert (shader->dst.type.vertex == VS_NONE); + + device->vertex_size = offset; + i965_stream_align (&device->vertex, device->vertex_size); + device->vertex.committed = device->vertex.used; + + device->rectangle_size = 3 * offset; +} + +static cairo_bool_t +i965_shader_needs_surface_update (const i965_shader_t *shader, + const i965_device_t *device) +{ + return device->target != shader->target || shader->target->stream == 0 || + (shader->source.base.bo != NULL && device->source != shader->source.base.bo) || + (shader->mask.base.bo != NULL && device->mask != shader->mask.base.bo) || + (shader->clip.base.bo != NULL && device->clip != shader->clip.base.bo); +} + +static cairo_bool_t +i965_shader_needs_constants_update (const i965_shader_t *shader, + const i965_device_t *device) +{ + if (shader->constants_size == 0) + return FALSE; + + if (device->constants_size != shader->constants_size) + return TRUE; + + return memcmp (device->constants, + shader->constants, + sizeof (float) * shader->constants_size); +} + +static cairo_bool_t +i965_shader_needs_state_update (const i965_shader_t *shader, + const i965_device_t *device) +{ + union { + struct i965_sf_state sf; + struct i965_wm_state wm; + struct i965_cc_state cc; + } state; + + i965_sf_state_init (&state.sf, shader); + if (! i965_sf_state_equal (&state.sf, &device->sf_state)) + return TRUE; + + i965_wm_state_init (&state.wm, shader); + if (! i965_wm_state_equal (&state.wm, &device->wm_state)) + return TRUE; + + i965_cc_state_init (&state.cc, shader); + if (! i965_cc_state_equal (&state.cc, &device->cc_state)) + return TRUE; + + return FALSE; +} + +static void +i965_emit_composite (i965_device_t *device, + i965_shader_t *shader) +{ + uint32_t draw_rectangle; + + if (i965_shader_needs_surface_update (shader, device)) { + /* Binding table pointers */ + OUT_BATCH (BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); + OUT_BATCH (0); /* vs */ + OUT_BATCH (0); /* gs */ + OUT_BATCH (0); /* clip */ + OUT_BATCH (0); /* sf */ + /* Only the PS uses the binding table */ + OUT_BATCH (emit_binding_table (device, shader)); + + device->target = shader->target; + device->source = shader->source.base.bo; + device->mask = shader->mask.base.bo; + device->clip = shader->clip.base.bo; + } + + /* The drawing rectangle clipping is always on. Set it to values that + * shouldn't do any clipping. + */ + draw_rectangle = DRAW_YMAX (shader->target->intel.drm.height - 1) | + DRAW_XMAX (shader->target->intel.drm.width - 1); + if (draw_rectangle != device->draw_rectangle) { + OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2); + OUT_BATCH (0x00000000); /* ymin, xmin */ + OUT_BATCH (draw_rectangle); + OUT_BATCH (0x00000000); /* yorigin, xorigin */ + device->draw_rectangle = draw_rectangle; + } + + /* skip the depth buffer */ + /* skip the polygon stipple */ + /* skip the polygon stipple offset */ + /* skip the line stipple */ + + /* Set the pointers to the 3d pipeline state */ + if (i965_shader_needs_state_update (shader, device)) { + OUT_BATCH (BRW_3DSTATE_PIPELINED_POINTERS | 5); + OUT_BATCH (vs_unit_state_emit (device)); + OUT_BATCH (BRW_GS_DISABLE); + OUT_BATCH (BRW_CLIP_DISABLE); + OUT_BATCH (gen4_create_sf_state (device, shader)); + OUT_BATCH (gen4_create_wm_state (device, shader)); + OUT_BATCH (cc_state_emit (device, shader)); + + /* Once the units are initialized, we need to setup the fences */ + i965_emit_urb_fences (device); + } + + if (i965_shader_needs_constants_update (shader, device)) { + uint32_t size = (sizeof (float) * shader->constants_size + 63) & -64; + + /* XXX reuse clear/black/white + * ht! + */ + + /* XXX CONSTANT_BUFFER Address Offset Disable? INSTPM? */ + + assert (size <= 64 * URB_CS_ENTRY_SIZE); + assert (((sizeof (float) * shader->constants_size + 31) & -32) == 32 * i965_shader_const_urb_length (shader)); + + OUT_BATCH (BRW_CONSTANT_BUFFER | (1 << 8)); + assert ((device->constant.used & 63) == 0); + i965_stream_add_pending_relocation (&device->constant, + device->batch.used, + I915_GEM_DOMAIN_INSTRUCTION, 0, + device->constant.used + size / 64 - 1); + OUT_BATCH (0); /* pending relocation */ + + device->constants = i965_stream_alloc (&device->constant, 0, size); + memcpy (device->constants, shader->constants, size); + device->constants_size = shader->constants_size; + } + + i965_emit_vertex_element (device, shader); +} + +void +i965_flush_vertices (i965_device_t *device) +{ + int vertex_count, vertex_start; + + if (device->vertex.used == device->vertex.committed) + return; + + vertex_start = device->vertex.committed / device->vertex_size; + vertex_count = + (device->vertex.used - device->vertex.committed) / device->vertex_size; + + assert (vertex_count); + + if (device->vertex_size != device->last_vertex_size) { + i965_stream_add_pending_relocation (&device->vertex, + device->batch.used + 8, + I915_GEM_DOMAIN_VERTEX, 0, + 0); + + OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3); + OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) | + VB0_VERTEXDATA | + (device->vertex_size << VB0_BUFFER_PITCH_SHIFT)); + OUT_BATCH (0); /* pending relocation */ + OUT_BATCH (0); + OUT_BATCH (0); + device->last_vertex_size = device->vertex_size; + } + + OUT_BATCH (BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + OUT_BATCH (vertex_count); /* vertex count per instance */ + OUT_BATCH (vertex_start); /* start vertex offset */ + OUT_BATCH (1); /* single instance */ + OUT_BATCH (0); + OUT_BATCH (0); + + device->vertex.committed = device->vertex.used; + +#if 1 + OUT_BATCH (MI_FLUSH); +#endif +} + +void +i965_finish_vertices (i965_device_t *device) +{ + cairo_status_t status; + + i965_flush_vertices (device); + + i965_stream_commit (device, &device->vertex); + + if (! i965_shader_check_aperture (device->shader, device)) { + status = i965_device_flush (device); + if (unlikely (status)) + longjmp (device->shader->unwind, status); + + status = i965_shader_commit (device->shader, device); + assert (status == CAIRO_STATUS_SUCCESS); + } + + device->last_vertex_size = 0; +} + +static cairo_bool_t +i965_shader_needs_update (const i965_shader_t *shader, + const i965_device_t *device) +{ + if (i965_shader_needs_surface_update (shader, device)) + return TRUE; + + if (i965_shader_needs_constants_update (shader, device)) + return TRUE; + + return i965_shader_needs_state_update (shader, device); +} + +static void +i965_shader_reduce (i965_shader_t *shader, + const i965_device_t *device) +{ + if (shader->op == CAIRO_OPERATOR_OVER && + (i965_wm_kernel_hash (shader) & ~0xff) == 0 && + (shader->source.base.content & CAIRO_CONTENT_ALPHA) == 0) + { + shader->op = CAIRO_OPERATOR_SOURCE; + } +} + +cairo_status_t +i965_shader_commit (i965_shader_t *shader, + i965_device_t *device) +{ + cairo_status_t status; + + if (! shader->committed) { + device->shader = shader; + + status = i965_shader_setup_dst (shader); + if (unlikely (status)) + return status; + + i965_shader_setup_constants (shader); + i965_shader_reduce (shader, device); + + if ((status = setjmp (shader->unwind))) + return status; + + shader->committed = TRUE; + } + + if (! i965_shader_needs_update (shader, device)) + return CAIRO_STATUS_SUCCESS; + + /* XXX too many guestimates about likely maximum sizes */ +recheck: + if (device->batch.used + 128 > device->batch.size || + ! i965_shader_check_aperture (shader, device)) + { + status = i965_device_flush (device); + if (unlikely (status)) + longjmp (shader->unwind, status); + } + + i965_flush_vertices (device); + + if (unlikely (device->surface.used + 128 > device->surface.size || + device->surface.num_relocations + 4 > device->surface.max_relocations)) + { + i965_stream_commit (device, &device->surface); + goto recheck; + } + + if (unlikely (device->constant.used + sizeof (device->constants) > device->constant.size || + device->constant.num_pending_relocations == device->constant.max_pending_relocations)) + { + i965_stream_commit (device, &device->constant); + goto recheck; + } + + if (unlikely (device->general.used + 512 > device->general.size)) { + i965_stream_commit (device, &device->general); + i965_general_state_reset (device); + goto recheck; + } + + if (unlikely (device->batch.used == 0)) + i965_emit_invariants (device); + + if (unlikely (device->surface.num_pending_relocations == 0 || + device->general.num_pending_relocations == 0)) + { + i965_emit_base (device); + } + + i965_emit_composite (device, shader); + + return CAIRO_STATUS_SUCCESS; +} + +void +i965_clipped_vertices (i965_device_t *device, + struct i965_vbo *vbo, + cairo_region_t *clip_region) +{ + int i, num_rectangles, size; + cairo_status_t status; + + if (vbo->count == 0) + return; + + num_rectangles = cairo_region_num_rectangles (clip_region); + assert (num_rectangles); + + if (vbo->next || + vbo->count * device->vertex_size + device->vertex.used > device->vertex.size) + { + i965_finish_vertices (device); + + size = device->rectangle_size; + do { + for (i = 0; i < num_rectangles; i++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, i, &rect); + + if (unlikely (device->vertex.used + size > device->vertex.size || + device->batch.used + 64 > device->batch.size || + ! i965_shader_check_aperture (device->shader, device))) + { + status = i965_device_flush (device); + if (unlikely (status)) + longjmp (device->shader->unwind, status); + + status = i965_shader_commit (device->shader, device); + assert (status == CAIRO_STATUS_SUCCESS); + } + + i965_emit_relocation (device, &device->batch, + vbo->bo, 0, + I915_GEM_DOMAIN_VERTEX, 0, + device->batch.used + 8); + + OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3); + OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) | + VB0_VERTEXDATA | + (device->vertex_size << VB0_BUFFER_PITCH_SHIFT)); + OUT_BATCH (vbo->bo->offset); + OUT_BATCH (0); + OUT_BATCH (0); + + /* XXX scissor? */ + OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2); + OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x)); + OUT_BATCH (DRAW_YMAX (rect.y + rect.height - 1) | + DRAW_XMIN (rect.x + rect.width - 1)); + OUT_BATCH (0x00000000); /* yorigin, xorigin */ + + OUT_BATCH (BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + OUT_BATCH (vbo->count); /* vertex count per instance */ + OUT_BATCH (0); /* start vertex offset */ + OUT_BATCH (1); /* single instance */ + OUT_BATCH (0); + OUT_BATCH (0); + } + } while ((vbo = vbo->next) != NULL); + assert (device->last_vertex_size == 0); + } else { + int vertex_start, vertex_count; + void *ptr; + + vertex_start = device->vertex.committed / device->vertex_size; + vertex_count = vbo->count; + + size = vertex_count * device->vertex_size; + ptr = intel_bo_map (&device->intel, vbo->bo); + memcpy (device->vertex.data + device->vertex.used, ptr, size); + intel_bo_unmap (vbo->bo); + device->vertex.committed = device->vertex.used += size; + + for (i = 0; i < num_rectangles; i++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, i, &rect); + + /* XXX scissor? */ + OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2); + OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x)); + OUT_BATCH (DRAW_YMAX (rect.y + rect.height - 1) | + DRAW_XMIN (rect.x + rect.width - 1)); + OUT_BATCH (0x00000000); /* yorigin, xorigin */ + + OUT_BATCH (BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + OUT_BATCH (vertex_count); /* vertex count per instance */ + OUT_BATCH (vertex_start); /* start vertex offset */ + OUT_BATCH (1); /* single instance */ + OUT_BATCH (0); + OUT_BATCH (0); + } + } + + device->draw_rectangle = 0; +} diff --git a/src/drm/cairo-drm-i965-spans.c b/src/drm/cairo-drm-i965-spans.c new file mode 100644 index 00000000..9cf6d000 --- /dev/null +++ b/src/drm/cairo-drm-i965-spans.c @@ -0,0 +1,408 @@ +/* cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Red Hat, Inc. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "cairoint.h" + +#include "cairo-composite-rectangles-private.h" +#include "cairo-boxes-private.h" +#include "cairo-error-private.h" +#include "cairo-drm-i965-private.h" + +/* Operates in either immediate or retained mode. + * When given a clip region we record the sequence of vbo and then + * replay them for each clip rectangle, otherwise we simply emit + * the vbo straight into the command stream. + */ + +typedef struct _i965_spans i965_spans_t; + +typedef float * +(*i965_get_rectangle_func_t) (i965_spans_t *spans); + +struct _i965_spans { + cairo_span_renderer_t renderer; + + i965_device_t *device; + + int xmin, xmax; + cairo_bool_t is_bounded; + const cairo_rectangle_int_t *extents; + + i965_get_rectangle_func_t get_rectangle; + i965_shader_t shader; + + cairo_region_t *clip_region; + + struct i965_vbo head, *tail; + + unsigned int vbo_offset; + float *vbo_base; +}; + +static float * +i965_spans_emit_rectangle (i965_spans_t *spans) +{ + return i965_add_rectangle (spans->device); +} + +static float * +i965_spans_accumulate_rectangle (i965_spans_t *spans) +{ + float *vertices; + uint32_t size; + + size = spans->device->rectangle_size; + if (unlikely (spans->vbo_offset + size > I965_VERTEX_SIZE)) { + struct i965_vbo *vbo; + + intel_bo_unmap (spans->tail->bo); + + vbo = malloc (sizeof (struct i965_vbo)); + if (unlikely (vbo == NULL)) { + /* throw error! */ + } + + spans->tail->next = vbo; + spans->tail = vbo; + + vbo->next = NULL; + vbo->bo = intel_bo_create (&spans->device->intel, I965_VERTEX_SIZE, FALSE); + vbo->count = 0; + + spans->vbo_offset = 0; + spans->vbo_base = intel_bo_map (&spans->device->intel, vbo->bo); + } + + vertices = spans->vbo_base + spans->vbo_offset; + spans->vbo_offset += size; + spans->tail->count += 3; + + return vertices; +} + +static void +i965_span_rectangle (i965_spans_t *spans, + int x0, int x1, int y0, int y1, + int alpha) +{ + float *vertices; + float a = alpha / 255.; + + vertices = spans->get_rectangle (spans); + + *vertices++ = x1; + *vertices++ = y1; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y1; + *vertices++ = a; + + *vertices++ = x0; + *vertices++ = y0; + *vertices++ = a; +} + +static cairo_status_t +i965_bounded_spans_mono (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i965_spans_t *spans = abstract_renderer; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + do { + if (half[0].coverage >= 128) { + i965_span_rectangle (spans, + half[0].x, half[1].x, + y, y + height, + 255); + } + half++; + } while (--num_spans > 1); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_bounded_spans (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i965_spans_t *spans = abstract_renderer; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + do { + if (half[0].coverage) { + i965_span_rectangle (spans, + half[0].x, half[1].x, + y, y + height, + half[0].coverage); + } + half++; + } while (--num_spans > 1); + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_unbounded_spans (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i965_spans_t *spans = abstract_renderer; + + if (num_spans == 0) { + i965_span_rectangle (spans, + spans->xmin, spans->xmax, + y, y + height, + 0); + return CAIRO_STATUS_SUCCESS; + } + + if (half[0].x != spans->xmin) { + i965_span_rectangle (spans, + spans->xmin, half[0].x, + y, y + height, + 0); + } + + do { + i965_span_rectangle (spans, + half[0].x, half[1].x, + y, y + height, + half[0].coverage); + half++; + } while (--num_spans > 1); + + if (half[0].x != spans->xmax) { + i965_span_rectangle (spans, + half[0].x, spans->xmax, + y, y + height, + 0); + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_unbounded_spans_mono (void *abstract_renderer, + int y, int height, + const cairo_half_open_span_t *half, + unsigned num_spans) +{ + i965_spans_t *spans = abstract_renderer; + + if (num_spans == 0) { + i965_span_rectangle (spans, + spans->xmin, spans->xmax, + y, y + height, + 0); + return CAIRO_STATUS_SUCCESS; + } + + if (half[0].x != spans->xmin) { + i965_span_rectangle (spans, + spans->xmin, half[0].x, + y, y + height, + 0); + } + + do { + int alpha = 0; + if (half[0].coverage >= 128) + alpha = 255; + i965_span_rectangle (spans, + half[0].x, half[1].x, + y, y + height, + alpha); + half++; + } while (--num_spans > 1); + + if (half[0].x != spans->xmax) { + i965_span_rectangle (spans, + half[0].x, spans->xmax, + y, y + height, + 0); + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_spans_init (i965_spans_t *spans, + i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + cairo_status_t status; + + spans->device = i965_device (dst); + i965_shader_init (&spans->shader, dst, op); + + spans->is_bounded = extents->is_bounded; + if (extents->is_bounded) { + if (antialias == CAIRO_ANTIALIAS_NONE) + spans->renderer.render_rows = i965_bounded_spans_mono; + else + spans->renderer.render_rows = i965_bounded_spans; + + spans->extents = &extents->bounded; + } else { + if (antialias == CAIRO_ANTIALIAS_NONE) + spans->renderer.render_rows = i965_unbounded_spans_mono; + else + spans->renderer.render_rows = i965_unbounded_spans; + + spans->extents = &extents->unbounded; + } + spans->xmin = spans->extents->x; + spans->xmax = spans->extents->x + spans->extents->width; + + spans->clip_region = NULL; + if (clip != NULL) { + cairo_region_t *clip_region = NULL; + + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || status == CAIRO_INT_STATUS_UNSUPPORTED); + + if (clip_region != NULL && cairo_region_num_rectangles (clip_region) == 1) + clip_region = NULL; + + spans->clip_region = clip_region; + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&spans->shader, clip); + } + + spans->head.next = NULL; + spans->head.bo = NULL; + spans->head.count = 0; + spans->tail = &spans->head; + + if (spans->clip_region == NULL) { + spans->get_rectangle = i965_spans_emit_rectangle; + } else { + spans->get_rectangle = i965_spans_accumulate_rectangle; + spans->head.bo = intel_bo_create (&spans->device->intel, + I965_VERTEX_SIZE, FALSE); + if (unlikely (spans->head.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + spans->vbo_base = intel_bo_map (&spans->device->intel, spans->head.bo); + } + spans->vbo_offset = 0; + + return i965_shader_acquire_pattern (&spans->shader, + &spans->shader.source, + pattern, &extents->bounded); +} + +static void +i965_spans_fini (i965_spans_t *spans) +{ + i965_shader_fini (&spans->shader); + + if (spans->head.bo != NULL) { + struct i965_vbo *vbo, *next; + + intel_bo_destroy (&spans->device->intel, spans->head.bo); + for (vbo = spans->head.next; vbo != NULL; vbo = next) { + next = vbo->next; + intel_bo_destroy (&spans->device->intel, vbo->bo); + free (vbo); + } + } +} + +cairo_status_t +i965_clip_and_composite_spans (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_antialias_t antialias, + i965_spans_func_t draw_func, + void *draw_closure, + const cairo_composite_rectangles_t*extents, + cairo_clip_t *clip) +{ + i965_spans_t spans; + i965_device_t *device; + cairo_status_t status; + + if (op == CAIRO_OPERATOR_CLEAR) { + pattern = &_cairo_pattern_white.base; + op = CAIRO_OPERATOR_DEST_OUT; + } + + status = i965_spans_init (&spans, dst, op, pattern, antialias, clip, extents); + if (unlikely (status)) + return status; + + spans.shader.mask.base.content = CAIRO_CONTENT_ALPHA; + spans.shader.mask.type.fragment = FS_SPANS; + spans.shader.mask.type.vertex = VS_SPANS; + spans.shader.mask.type.pattern = PATTERN_BASE; + + status = cairo_device_acquire (dst->intel.drm.base.device); + if (unlikely (status)) + goto CLEANUP_SPANS; + + device = i965_device (dst); + status = i965_shader_commit (&spans.shader, device); + if (unlikely (status)) + goto CLEANUP_DEVICE; + + status = draw_func (draw_closure, &spans.renderer, spans.extents); + if (spans.clip_region != NULL && status == CAIRO_STATUS_SUCCESS) { + intel_bo_unmap (spans.tail->bo); + i965_clipped_vertices (device, &spans.head, spans.clip_region); + } + + CLEANUP_DEVICE: + cairo_device_release (dst->intel.drm.base.device); + CLEANUP_SPANS: + i965_spans_fini (&spans); + + return status; +} diff --git a/src/drm/cairo-drm-i965-surface.c b/src/drm/cairo-drm-i965-surface.c new file mode 100644 index 00000000..0e0def81 --- /dev/null +++ b/src/drm/cairo-drm-i965-surface.c @@ -0,0 +1,1949 @@ +/* Cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Kristian Høgsberg + * Copyright © 2009 Chris Wilson + * Copyright © 2009 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Kristian Høgsberg. + * + * Based on the xf86-intel-driver i965 render acceleration code, + * authored by: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Eric Anholt <eric@anholt.net> + * Carl Worth <cworth@redhat.com> + * Keith Packard <keithp@keithp.com> + */ + +/* XXX + * + * FIXME: Use brw_PLN for [DevCTG-B+] + * + */ + +#include "cairoint.h" + +#include "cairo-drm-private.h" +#include "cairo-drm-ioctl-private.h" +#include "cairo-drm-intel-private.h" +#include "cairo-drm-intel-command-private.h" +#include "cairo-drm-intel-ioctl-private.h" +#include "cairo-drm-i965-private.h" + +#include "cairo-boxes-private.h" +#include "cairo-composite-rectangles-private.h" +#include "cairo-error-private.h" +#include "cairo-region-private.h" +#include "cairo-surface-offset-private.h" + +#include <sys/ioctl.h> +#include <errno.h> + +#define I965_MAX_SIZE 8192 + +static const cairo_surface_backend_t i965_surface_backend; + +static void +i965_stream_init (i965_stream_t *stream, + uint8_t *data, uint32_t size, + struct i965_pending_relocation *pending, int max_pending, + struct drm_i915_gem_relocation_entry *relocations, int max_relocations) + +{ + stream->used = stream->committed = 0; + stream->data = data; + stream->size = size; + stream->serial = 1; + + stream->num_pending_relocations = 0; + stream->max_pending_relocations = max_pending; + stream->pending_relocations = pending; + + stream->num_relocations = 0; + stream->max_relocations = max_relocations; + stream->relocations = relocations; +} + +static void +i965_add_relocation (i965_device_t *device, + intel_bo_t *bo, + uint32_t read_domains, + uint32_t write_domain) +{ + if (bo->exec == NULL) { + int i; + + device->exec.gtt_size += bo->base.size; + + i = device->exec.count++; + assert (i < ARRAY_LENGTH (device->exec.exec)); + + device->exec.exec[i].handle = bo->base.handle; + device->exec.exec[i].relocation_count = 0; + device->exec.exec[i].relocs_ptr = 0; + device->exec.exec[i].alignment = 0; + device->exec.exec[i].offset = 0; + device->exec.exec[i].flags = 0; + device->exec.exec[i].rsvd1 = 0; + device->exec.exec[i].rsvd2 = 0; + + device->exec.bo[i] = intel_bo_reference (bo); + bo->exec = &device->exec.exec[i]; + } + + if (cairo_list_is_empty (&bo->link)) + cairo_list_add_tail (&device->flush, &bo->link); + + assert (write_domain == 0 || bo->batch_write_domain == 0 || bo->batch_write_domain == write_domain); + bo->batch_read_domains |= read_domains; + bo->batch_write_domain |= write_domain; +} + +void +i965_emit_relocation (i965_device_t *device, + i965_stream_t *stream, + intel_bo_t *target, + uint32_t target_offset, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset) +{ + int n; + + assert (target_offset < target->base.size); + + i965_add_relocation (device, target, read_domains, write_domain); + + n = stream->num_relocations++; + assert (n < stream->max_relocations); + + stream->relocations[n].offset = offset; + stream->relocations[n].delta = target_offset; + stream->relocations[n].target_handle = target->base.handle; + stream->relocations[n].read_domains = read_domains; + stream->relocations[n].write_domain = write_domain; + stream->relocations[n].presumed_offset = target->offset; +} + +static void +i965_stream_reset (i965_stream_t *stream) +{ + stream->used = stream->committed = 0; + stream->num_relocations = 0; + stream->num_pending_relocations = 0; + if (++stream->serial == 0) + stream->serial = 1; +} + +void +i965_stream_commit (i965_device_t *device, + i965_stream_t *stream) +{ + intel_bo_t *bo; + int n; + + assert (stream->used); + + bo = intel_bo_create (&device->intel, stream->used, FALSE); + + /* apply pending relocations */ + for (n = 0; n < stream->num_pending_relocations; n++) { + struct i965_pending_relocation *p = &stream->pending_relocations[n]; + + i965_emit_relocation (device, &device->batch, bo, + p->delta, + p->read_domains, + p->write_domain, + p->offset); + if (bo->offset) + *(uint32_t *) (device->batch.data + p->offset) = bo->offset + p->delta; + } + + intel_bo_write (&device->intel, bo, 0, stream->used, stream->data); + + if (stream->num_relocations) { + assert (bo->exec != NULL); + bo->exec->relocs_ptr = (uintptr_t) stream->relocations; + bo->exec->relocation_count = stream->num_relocations; + } + + intel_bo_destroy (&device->intel, bo); + + i965_stream_reset (stream); +} + +static void +sf_states_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->sf_states, entry); + _cairo_freelist_free (&device->sf_freelist, entry); +} + +static void +cc_offsets_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->cc_states, entry); + _cairo_freelist_free (&device->cc_freelist, entry); +} + +static void +wm_kernels_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->wm_kernels, entry); + _cairo_freelist_free (&device->wm_kernel_freelist, entry); +} + +static void +wm_states_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->wm_states, entry); + _cairo_freelist_free (&device->wm_state_freelist, entry); +} + +static void +wm_bindings_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->wm_bindings, entry); + _cairo_freelist_free (&device->wm_binding_freelist, entry); +} + +static void +samplers_pluck (void *entry, void *closure) +{ + i965_device_t *device = closure; + + _cairo_hash_table_remove (device->samplers, entry); + _cairo_freelist_free (&device->sampler_freelist, entry); +} + +void +i965_general_state_reset (i965_device_t *device) +{ + _cairo_hash_table_foreach (device->sf_states, + sf_states_pluck, + device); + + _cairo_hash_table_foreach (device->cc_states, + cc_offsets_pluck, + device); + + _cairo_hash_table_foreach (device->wm_kernels, + wm_kernels_pluck, + device); + + _cairo_hash_table_foreach (device->wm_states, + wm_states_pluck, + device); + + _cairo_hash_table_foreach (device->wm_bindings, + wm_bindings_pluck, + device); + + _cairo_hash_table_foreach (device->samplers, + samplers_pluck, + device); + + device->vs_offset = (uint32_t) -1; + device->border_color_offset = (uint32_t) -1; + + if (device->general_state != NULL) { + intel_bo_destroy (&device->intel, device->general_state); + device->general_state = NULL; + } +} + +static void +i965_device_reset (i965_device_t *device) +{ + device->exec.count = 0; + device->exec.gtt_size = I965_CONSTANT_SIZE + + I965_VERTEX_SIZE + + I965_SURFACE_SIZE + + I965_GENERAL_SIZE + + I965_BATCH_SIZE; + + device->sf_state.entry.hash = (uint32_t) -1; + device->wm_state.entry.hash = (uint32_t) -1; + device->wm_binding.entry.hash = (uint32_t) -1; + device->cc_state.entry.hash = (uint32_t) -1; + + device->target = NULL; + device->source = NULL; + device->mask = NULL; + device->clip = NULL; + + device->draw_rectangle = (uint32_t) -1; + + device->vertex_type = (uint32_t) -1; + device->vertex_size = 0; + device->rectangle_size = 0; + device->last_vertex_size = 0; + + device->constants = NULL; + device->constants_size = 0; + + device->have_urb_fences = FALSE; +} + +static cairo_status_t +i965_exec (i965_device_t *device, uint32_t offset) +{ + struct drm_i915_gem_execbuffer2 execbuf; + cairo_status_t status = CAIRO_STATUS_SUCCESS; + int ret, i; + + execbuf.buffers_ptr = (uintptr_t) device->exec.exec; + execbuf.buffer_count = device->exec.count; + execbuf.batch_start_offset = offset; + execbuf.batch_len = device->batch.used; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.num_cliprects = 0; + execbuf.cliprects_ptr = 0; + execbuf.flags = I915_GEM_3D_PIPELINE; + execbuf.rsvd1 = 0; + execbuf.rsvd2 = 0; + +#if 0 + printf ("exec: offset=%d, length=%d, buffers=%d\n", + offset, device->batch.used, device->exec.count); + intel_dump_batchbuffer ((uint32_t *) device->batch.data, + device->batch.used, + device->intel.base.chip_id); +#endif + + ret = 0; + do { + ret = ioctl (device->intel.base.fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); + } while (ret != 0 && errno == EINTR); + if (unlikely (ret)) { + int n; + + if (errno == ENOMEM) + status = _cairo_error (CAIRO_STATUS_NO_MEMORY); + else + status = _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + + fprintf (stderr, "Batch submission failed: %d\n", errno); + fprintf (stderr, " gtt size: %zd/%zd\n", + device->exec.gtt_size, device->intel.gtt_avail_size); + + fprintf (stderr, " %d buffers:\n", + device->exec.count); + for (n = 0; n < i; n++) { + fprintf (stderr, " exec[%d] = %d\n", + n, device->exec.bo[n]->base.size); + } + + intel_dump_batchbuffer ((uint32_t *) device->batch.data, + device->batch.used, + device->intel.base.chip_id); + } + + /* XXX any write target within the batch should now be in error */ + for (i = 0; i < device->exec.count; i++) { + cairo_bool_t ret; + + device->exec.bo[i]->offset = device->exec.exec[i].offset; + device->exec.bo[i]->exec = NULL; + device->exec.bo[i]->batch_read_domains = 0; + device->exec.bo[i]->batch_write_domain = 0; + + if (device->exec.bo[i]->purgeable) { + ret = intel_bo_madvise (&device->intel, + device->exec.bo[i], + I915_MADV_DONTNEED); + /* ignore immediate notification of purging */ + } + + cairo_list_init (&device->exec.bo[i]->link); + intel_bo_destroy (&device->intel, device->exec.bo[i]); + } + cairo_list_init (&device->flush); + + device->exec.count = 0; + + return status; +} + +static inline uint32_t +next_bo_size (uint32_t v) +{ + v = (v + 8191) / 8192; + + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + + return v * 8192; +} + +static void +_copy_to_bo_and_apply_relocations (i965_device_t *device, + intel_bo_t *bo, + i965_stream_t *stream, + uint32_t offset) +{ + int n; + + intel_bo_write (&device->intel, bo, + offset, stream->used, + stream->data); + + for (n = 0; n < stream->num_pending_relocations; n++) { + struct i965_pending_relocation *p = &stream->pending_relocations[n]; + + i965_emit_relocation (device, &device->batch, bo, + p->delta + offset, + p->read_domains, + p->write_domain, + p->offset); + + if (bo->offset) { + *(uint32_t *) (device->batch.data + p->offset) = + bo->offset + p->delta + offset; + } + } +} + +cairo_status_t +i965_device_flush (i965_device_t *device) +{ + cairo_status_t status; + uint32_t aligned, max; + intel_bo_t *bo; + int n; + + if (device->batch.used == 0) + return CAIRO_STATUS_SUCCESS; + + i965_flush_vertices (device); + + OUT_BATCH (MI_BATCH_BUFFER_END); + /* Emit a padding dword if we aren't going to be quad-word aligned. */ + if (device->batch.used & 4) + OUT_BATCH (MI_NOOP); + +#if 0 + printf ("device flush: vertex=%d, constant=%d, surface=%d, general=%d, batch=%d\n", + device->vertex.used, + device->constant.used, + device->surface.used, + device->general.used, + device->batch.used); +#endif + + /* can we pack the surface state into the tail of the general state? */ + if (device->general.used == device->general.committed) { + if (device->general.used) { + assert (device->general.num_pending_relocations == 1); + assert (device->general_state != NULL); + i965_emit_relocation (device, &device->batch, + device->general_state, + device->general.pending_relocations[0].delta, + device->general.pending_relocations[0].read_domains, + device->general.pending_relocations[0].write_domain, + device->general.pending_relocations[0].offset); + + if (device->general_state->offset) { + *(uint32_t *) (device->batch.data + + device->general.pending_relocations[0].offset) = + device->general_state->offset + + device->general.pending_relocations[0].delta; + } + } + } else { + assert (device->general.num_pending_relocations == 1); + if (device->general_state != NULL) { + intel_bo_destroy (&device->intel, device->general_state); + device->general_state = NULL; + } + + bo = intel_bo_create (&device->intel, + device->general.used, + FALSE); + if (unlikely (bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + aligned = (device->general.used + 31) & -32; + if (device->surface.used && + aligned + device->surface.used <= bo->base.size) + { + _copy_to_bo_and_apply_relocations (device, bo, &device->general, 0); + _copy_to_bo_and_apply_relocations (device, bo, &device->surface, aligned); + + if (device->surface.num_relocations) { + for (n = 0; n < device->surface.num_relocations; n++) + device->surface.relocations[n].offset += aligned; + + assert (bo->exec != NULL); + bo->exec->relocs_ptr = (uintptr_t) device->surface.relocations; + bo->exec->relocation_count = device->surface.num_relocations; + } + + i965_stream_reset (&device->surface); + } + else + { + _copy_to_bo_and_apply_relocations (device, bo, &device->general, 0); + } + + /* Note we don't reset the general state, just mark what data we've committed. */ + device->general.committed = device->general.used; + device->general_state = bo; + } + device->general.num_pending_relocations = 0; + + /* Combine vertex+constant+surface+batch streams? */ + max = aligned = device->vertex.used; + if (device->constant.used) { + aligned = (aligned + 63) & -64; + aligned += device->constant.used; + if (device->constant.used > max) + max = device->constant.used; + } + if (device->surface.used) { + aligned = (aligned + 31) & -32; + aligned += device->surface.used; + if (device->surface.used > max) + max = device->surface.used; + } + aligned = (aligned + 63) & -64; + aligned += device->batch.used; + if (device->batch.used > max) + max = device->batch.used; + if (aligned <= next_bo_size (max)) { + int batch_num_relocations; + + if (aligned <= 8192) + max = aligned; + + bo = intel_bo_create (&device->intel, max, FALSE); + if (unlikely (bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + assert (aligned <= bo->base.size); + + if (device->vertex.used) + _copy_to_bo_and_apply_relocations (device, bo, &device->vertex, 0); + + aligned = device->vertex.used; + if (device->constant.used) { + aligned = (aligned + 63) & -64; + _copy_to_bo_and_apply_relocations (device, bo, &device->constant, aligned); + aligned += device->constant.used; + } + + batch_num_relocations = device->batch.num_relocations; + if (device->surface.used) { + aligned = (aligned + 31) & -32; + _copy_to_bo_and_apply_relocations (device, bo, &device->surface, aligned); + + batch_num_relocations = device->batch.num_relocations; + if (device->surface.num_relocations) { + assert (device->batch.num_relocations + device->surface.num_relocations < device->batch.max_relocations); + + memcpy (device->batch.relocations + device->batch.num_relocations, + device->surface.relocations, + sizeof (device->surface.relocations[0]) * device->surface.num_relocations); + + for (n = 0; n < device->surface.num_relocations; n++) + device->batch.relocations[device->batch.num_relocations + n].offset += aligned; + + device->batch.num_relocations += device->surface.num_relocations; + } + + aligned += device->surface.used; + } + + aligned = (aligned + 63) & -64; + intel_bo_write (&device->intel, bo, + aligned, device->batch.used, + device->batch.data); + + for (n = 0; n < batch_num_relocations; n++) + device->batch.relocations[n].offset += aligned; + + if (device->exec.bo[device->exec.count-1] == bo) { + assert (bo->exec == &device->exec.exec[device->exec.count-1]); + + bo->exec->relocation_count = device->batch.num_relocations; + bo->exec->relocs_ptr = (uintptr_t) device->batch.relocations; + intel_bo_destroy (&device->intel, bo); + } else { + assert (bo->exec == NULL); + + n = device->exec.count++; + device->exec.exec[n].handle = bo->base.handle; + device->exec.exec[n].relocation_count = device->batch.num_relocations; + device->exec.exec[n].relocs_ptr = (uintptr_t) device->batch.relocations; + device->exec.exec[n].alignment = 0; + device->exec.exec[n].offset = 0; + device->exec.exec[n].flags = 0; + device->exec.exec[n].rsvd1 = 0; + device->exec.exec[n].rsvd2 = 0; + + /* transfer ownership to the exec */ + device->exec.bo[n] = bo; + } + } else { + i965_stream_commit (device, &device->vertex); + + if (device->constant.used && device->surface.used){ + aligned = (device->constant.used + 31) & -32; + aligned += device->surface.used; + + max = MAX (device->constant.used, device->surface.used); + if (aligned <= next_bo_size (max)) { + if (aligned <= 8192) + max = aligned; + + bo = intel_bo_create (&device->intel, max, FALSE); + if (unlikely (bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + assert (aligned <= bo->base.size); + + _copy_to_bo_and_apply_relocations (device, bo, &device->constant, 0); + + aligned = (device->constant.used + 31) & -32; + + _copy_to_bo_and_apply_relocations (device, bo, &device->surface, aligned); + + if (device->surface.num_relocations) { + assert (bo->exec != NULL); + + for (n = 0; n < device->surface.num_relocations; n++) + device->surface.relocations[n].offset += aligned; + + bo->exec->relocs_ptr = (uintptr_t) device->surface.relocations; + bo->exec->relocation_count = device->surface.num_relocations; + } + + i965_stream_reset (&device->surface); + i965_stream_reset (&device->constant); + + intel_bo_destroy (&device->intel, bo); + } + } else { + if (device->constant.used) + i965_stream_commit (device, &device->constant); + if (device->surface.used) + i965_stream_commit (device, &device->surface); + } + + bo = intel_bo_create (&device->intel, device->batch.used, FALSE); + if (unlikely (bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + intel_bo_write (&device->intel, bo, + 0, device->batch.used, + device->batch.data); + + n = device->exec.count++; + device->exec.exec[n].handle = bo->base.handle; + device->exec.exec[n].relocation_count = device->batch.num_relocations; + device->exec.exec[n].relocs_ptr = (uintptr_t) device->batch.relocations; + device->exec.exec[n].alignment = 0; + device->exec.exec[n].offset = 0; + device->exec.exec[n].flags = 0; + device->exec.exec[n].rsvd1 = 0; + device->exec.exec[n].rsvd2 = 0; + + /* transfer ownership to the exec */ + device->exec.bo[n] = bo; + aligned = 0; + } + + intel_glyph_cache_unmap (&device->intel); + + status = i965_exec (device, aligned); + + i965_stream_reset (&device->vertex); + i965_stream_reset (&device->surface); + i965_stream_reset (&device->constant); + i965_stream_reset (&device->batch); + + intel_glyph_cache_unpin (&device->intel); + intel_snapshot_cache_thaw (&device->intel); + + i965_device_reset (device); + + return status; +} + +static cairo_status_t +i965_surface_finish (void *abstract_surface) +{ + i965_surface_t *surface = abstract_surface; + + return intel_surface_finish (&surface->intel); +} + +static cairo_status_t +i965_surface_flush (void *abstract_surface) +{ + i965_surface_t *surface = abstract_surface; + cairo_status_t status = CAIRO_STATUS_SUCCESS; + + if (surface->intel.drm.fallback != NULL) + return intel_surface_flush (abstract_surface); + + /* Forgo flushing on finish as the user cannot access the surface directly. */ + if (! surface->intel.drm.base.finished && + to_intel_bo (surface->intel.drm.bo)->exec != NULL) + { + status = cairo_device_acquire (surface->intel.drm.base.device); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + i965_device_t *device; + + device = i965_device (surface); + status = i965_device_flush (device); + cairo_device_release (&device->intel.base.base); + } + } + + return status; +} + +/* rasterisation */ + +static cairo_status_t +_composite_boxes_spans (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents) +{ + cairo_boxes_t *boxes = closure; + cairo_rectangular_scan_converter_t converter; + struct _cairo_boxes_chunk *chunk; + cairo_status_t status; + + _cairo_rectangular_scan_converter_init (&converter, extents); + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + cairo_box_t *box = chunk->base; + int i; + + for (i = 0; i < chunk->count; i++) { + status = _cairo_rectangular_scan_converter_add_box (&converter, &box[i], 1); + if (unlikely (status)) + goto CLEANUP; + } + } + + status = converter.base.generate (&converter.base, renderer); + + CLEANUP: + converter.base.destroy (&converter.base); + return status; +} + +cairo_status_t +i965_fixup_unbounded (i965_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip) +{ + i965_shader_t shader; + cairo_status_t status; + + i965_shader_init (&shader, dst, CAIRO_OPERATOR_CLEAR); + + if (clip != NULL) { + cairo_region_t *clip_region = NULL; + + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + assert (clip_region == NULL); + + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&shader, clip); + } else { + if (extents->bounded.width == extents->unbounded.width && + extents->bounded.height == extents->unbounded.height) + { + return CAIRO_STATUS_SUCCESS; + } + } + + status = i965_shader_acquire_pattern (&shader, + &shader.source, + &_cairo_pattern_clear.base, + &extents->unbounded); + if (unlikely (status)) { + i965_shader_fini (&shader); + return status; + } + + status = i965_shader_commit (&shader, i965_device (dst)); + if (unlikely (status)) { + i965_shader_fini (&shader); + return status; + } + + /* top */ + if (extents->bounded.y != extents->unbounded.y) { + cairo_rectangle_int_t rect; + + rect.x = extents->unbounded.x; + rect.y = extents->unbounded.y; + rect.width = extents->unbounded.width; + rect.height = extents->bounded.y - rect.y; + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + + /* left */ + if (extents->bounded.x != extents->unbounded.x) { + cairo_rectangle_int_t rect; + + rect.x = extents->unbounded.x; + rect.y = extents->bounded.y; + rect.width = extents->bounded.x - extents->unbounded.x; + rect.height = extents->bounded.height; + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + + /* right */ + if (extents->bounded.x + extents->bounded.width != extents->unbounded.x + extents->unbounded.width) { + cairo_rectangle_int_t rect; + + rect.x = extents->bounded.x + extents->bounded.width; + rect.y = extents->bounded.y; + rect.width = extents->unbounded.x + extents->unbounded.width - rect.x; + rect.height = extents->bounded.height; + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + + /* bottom */ + if (extents->bounded.y + extents->bounded.height != extents->unbounded.y + extents->unbounded.height) { + cairo_rectangle_int_t rect; + + rect.x = extents->unbounded.x; + rect.y = extents->bounded.y + extents->bounded.height; + rect.width = extents->unbounded.width; + rect.height = extents->unbounded.y + extents->unbounded.height - rect.y; + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + + i965_shader_fini (&shader); + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +i965_fixup_unbounded_boxes (i965_surface_t *dst, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip, + cairo_boxes_t *boxes) +{ + cairo_boxes_t clear; + cairo_box_t box; + cairo_region_t *clip_region = NULL; + cairo_status_t status; + struct _cairo_boxes_chunk *chunk; + i965_shader_t shader; + int i; + + if (boxes->num_boxes <= 1) + return i965_fixup_unbounded (dst, extents, clip); + + i965_shader_init (&shader, dst, CAIRO_OPERATOR_CLEAR); + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + if (status == CAIRO_INT_STATUS_UNSUPPORTED) + i965_shader_set_clip (&shader, clip); + } + + status = i965_shader_acquire_pattern (&shader, + &shader.source, + &_cairo_pattern_clear.base, + &extents->unbounded); + if (unlikely (status)) { + i965_shader_fini (&shader); + return status; + } + + _cairo_boxes_init (&clear); + + box.p1.x = _cairo_fixed_from_int (extents->unbounded.x + extents->unbounded.width); + box.p1.y = _cairo_fixed_from_int (extents->unbounded.y); + box.p2.x = _cairo_fixed_from_int (extents->unbounded.x); + box.p2.y = _cairo_fixed_from_int (extents->unbounded.y + extents->unbounded.height); + + if (clip_region == NULL) { + cairo_boxes_t tmp; + + _cairo_boxes_init (&tmp); + + status = _cairo_boxes_add (&tmp, &box); + assert (status == CAIRO_STATUS_SUCCESS); + + tmp.chunks.next = &boxes->chunks; + tmp.num_boxes += boxes->num_boxes; + + status = _cairo_bentley_ottmann_tessellate_boxes (&tmp, + CAIRO_FILL_RULE_WINDING, + &clear); + + tmp.chunks.next = NULL; + } else { + pixman_box32_t *pbox; + + pbox = pixman_region32_rectangles (&clip_region->rgn, &i); + _cairo_boxes_limit (&clear, (cairo_box_t *) pbox, i); + + status = _cairo_boxes_add (&clear, &box); + assert (status == CAIRO_STATUS_SUCCESS); + + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + for (i = 0; i < chunk->count; i++) { + status = _cairo_boxes_add (&clear, &chunk->base[i]); + if (unlikely (status)) { + _cairo_boxes_fini (&clear); + return status; + } + } + } + + status = _cairo_bentley_ottmann_tessellate_boxes (&clear, + CAIRO_FILL_RULE_WINDING, + &clear); + } + + if (likely (status == CAIRO_STATUS_SUCCESS && clear.num_boxes)) { + status = i965_shader_commit (&shader, i965_device (dst)); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + for (chunk = &clear.chunks; chunk != NULL; chunk = chunk->next) { + for (i = 0; i < chunk->count; i++) { + int x1 = _cairo_fixed_integer_part (chunk->base[i].p1.x); + int y1 = _cairo_fixed_integer_part (chunk->base[i].p1.y); + int x2 = _cairo_fixed_integer_part (chunk->base[i].p2.x); + int y2 = _cairo_fixed_integer_part (chunk->base[i].p2.y); + + i965_shader_add_rectangle (&shader, x1, y1, x2 - x1, y2 - y1); + } + } + } + i965_shader_fini (&shader); + } + + _cairo_boxes_fini (&clear); + + return status; +} + +static cairo_status_t +_composite_boxes (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *pattern, + cairo_boxes_t *boxes, + cairo_antialias_t antialias, + cairo_clip_t *clip, + const cairo_composite_rectangles_t *extents) +{ + cairo_bool_t need_clip_surface = FALSE; + cairo_region_t *clip_region = NULL; + const struct _cairo_boxes_chunk *chunk; + cairo_status_t status; + i965_shader_t shader; + int i; + + /* If the boxes are not pixel-aligned, we will need to compute a real mask */ + if (antialias != CAIRO_ANTIALIAS_NONE) { + if (! boxes->is_pixel_aligned) + return CAIRO_INT_STATUS_UNSUPPORTED; + } + + i965_shader_init (&shader, dst, op); + + status = i965_shader_acquire_pattern (&shader, + &shader.source, + pattern, + &extents->bounded); + if (unlikely (status)) + return status; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + if (need_clip_surface) + i965_shader_set_clip (&shader, clip); + } + + status = i965_shader_commit (&shader, i965_device (dst)); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + for (chunk = &boxes->chunks; chunk != NULL; chunk = chunk->next) { + cairo_box_t *box = chunk->base; + for (i = 0; i < chunk->count; i++) { + int x1 = _cairo_fixed_integer_round (box[i].p1.x); + int y1 = _cairo_fixed_integer_round (box[i].p1.y); + int x2 = _cairo_fixed_integer_round (box[i].p2.x); + int y2 = _cairo_fixed_integer_round (box[i].p2.y); + + if (x2 > x1 && y2 > y1) + i965_shader_add_rectangle (&shader, x1, y1, x2 - x1, y2 - y1); + } + } + } + i965_shader_fini (&shader); + + if (status == CAIRO_STATUS_SUCCESS && ! extents->is_bounded) + status = i965_fixup_unbounded_boxes (dst, extents, clip, boxes); + + return status; +} + +static cairo_status_t +_clip_and_composite_boxes (i965_surface_t *dst, + cairo_operator_t op, + const cairo_pattern_t *src, + cairo_boxes_t *boxes, + cairo_antialias_t antialias, + const cairo_composite_rectangles_t *extents, + cairo_clip_t *clip) +{ + cairo_status_t status; + + if (boxes->num_boxes == 0) { + if (extents->is_bounded) + return CAIRO_STATUS_SUCCESS; + + return i965_fixup_unbounded (dst, extents, clip); + } + + /* Use a fast path if the boxes are pixel aligned */ + status = _composite_boxes (dst, op, src, boxes, antialias, clip, extents); + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + return status; + + /* Otherwise render the boxes via an implicit mask and composite in the usual + * fashion. + */ + return i965_clip_and_composite_spans (dst, op, src, antialias, + _composite_boxes_spans, boxes, + extents, clip); +} + +static cairo_bool_t +box_is_aligned (const cairo_box_t *box) +{ + return + _cairo_fixed_is_integer (box->p1.x) && + _cairo_fixed_is_integer (box->p1.y) && + _cairo_fixed_is_integer (box->p2.x) && + _cairo_fixed_is_integer (box->p2.y); +} + +static inline cairo_status_t +_clip_to_boxes (cairo_clip_t **clip, + const cairo_composite_rectangles_t *extents, + cairo_box_t **boxes, + int *num_boxes) +{ + cairo_status_t status; + const cairo_rectangle_int_t *rect; + + rect = extents->is_bounded ? &extents->bounded: &extents->unbounded; + + if (*clip == NULL) + goto EXTENTS; + + status = _cairo_clip_rectangle (*clip, rect); + if (unlikely (status)) + return status; + + status = _cairo_clip_get_boxes (*clip, boxes, num_boxes); + if (status != CAIRO_INT_STATUS_UNSUPPORTED) { + if (extents->is_bounded || (*num_boxes == 1 && box_is_aligned (*boxes))) + *clip = NULL; + return status; + } + + EXTENTS: + _cairo_box_from_rectangle (&(*boxes)[0], rect); + *num_boxes = 1; + return CAIRO_STATUS_SUCCESS; +} + +static cairo_int_status_t +i965_surface_paint (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_clip_t *clip) +{ + i965_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + cairo_boxes_t boxes; + cairo_box_t *clip_boxes = boxes.boxes_embedded; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + int num_boxes = ARRAY_LENGTH (boxes.boxes_embedded); + cairo_status_t status; + + /* XXX unsupported operators? use pixel shader blending, eventually */ + + status = _cairo_composite_rectangles_init_for_paint (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, + clip); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + _cairo_boxes_init_for_array (&boxes, clip_boxes, num_boxes); + status = _clip_and_composite_boxes (dst, op, source, + &boxes, CAIRO_ANTIALIAS_DEFAULT, + &extents, clip); + if (clip_boxes != boxes.boxes_embedded) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static cairo_int_status_t +i965_surface_mask (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + const cairo_pattern_t *mask, + cairo_clip_t *clip) +{ + i965_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + i965_shader_t shader; + cairo_clip_t local_clip; + cairo_region_t *clip_region = NULL; + cairo_bool_t need_clip_surface = FALSE; + cairo_bool_t have_clip = FALSE; + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_mask (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, mask, clip); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL && extents.is_bounded) { + clip = _cairo_clip_init_copy (&local_clip, clip); + status = _cairo_clip_rectangle (clip, &extents.bounded); + if (unlikely (status)) { + _cairo_clip_fini (&local_clip); + return status; + } + + have_clip = TRUE; + } + + i965_shader_init (&shader, dst, op); + + status = i965_shader_acquire_pattern (&shader, + &shader.source, + source, + &extents.bounded); + if (unlikely (status)) + goto BAIL; + + status = i965_shader_acquire_pattern (&shader, + &shader.mask, + mask, + &extents.bounded); + if (unlikely (status)) + goto BAIL; + + if (clip != NULL) { + status = _cairo_clip_get_region (clip, &clip_region); + assert (status == CAIRO_STATUS_SUCCESS || CAIRO_INT_STATUS_UNSUPPORTED); + need_clip_surface = status == CAIRO_INT_STATUS_UNSUPPORTED; + if (need_clip_surface) + i965_shader_set_clip (&shader, clip); + } + + status = i965_shader_commit (&shader, i965_device (dst)); + if (unlikely (status)) + goto BAIL; + + if (clip_region != NULL) { + unsigned int n, num_rectangles; + + num_rectangles = cairo_region_num_rectangles (clip_region); + for (n = 0; n < num_rectangles; n++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip_region, n, &rect); + + i965_shader_add_rectangle (&shader, + rect.x, rect.y, + rect.width, rect.height); + } + } else { + i965_shader_add_rectangle (&shader, + extents.bounded.x, + extents.bounded.y, + extents.bounded.width, + extents.bounded.height); + } + + if (! extents.is_bounded) + status = i965_fixup_unbounded (dst, &extents, clip); + + BAIL: + i965_shader_fini (&shader); + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +typedef struct { + cairo_polygon_t polygon; + cairo_fill_rule_t fill_rule; + cairo_antialias_t antialias; +} composite_polygon_info_t; + +static cairo_status_t +_composite_polygon_spans (void *closure, + cairo_span_renderer_t *renderer, + const cairo_rectangle_int_t *extents) +{ + composite_polygon_info_t *info = closure; + cairo_botor_scan_converter_t converter; + cairo_status_t status; + cairo_box_t box; + + box.p1.x = _cairo_fixed_from_int (extents->x); + box.p1.y = _cairo_fixed_from_int (extents->y); + box.p2.x = _cairo_fixed_from_int (extents->x + extents->width); + box.p2.y = _cairo_fixed_from_int (extents->y + extents->height); + + _cairo_botor_scan_converter_init (&converter, &box, info->fill_rule); + + status = converter.base.add_polygon (&converter.base, &info->polygon); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = converter.base.generate (&converter.base, renderer); + + converter.base.destroy (&converter.base); + + return status; +} + +static cairo_int_status_t +i965_surface_stroke (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + const cairo_stroke_style_t *stroke_style, + const cairo_matrix_t *ctm, + const cairo_matrix_t *ctm_inverse, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + i965_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + composite_polygon_info_t info; + cairo_box_t boxes_stack[32], *clip_boxes = boxes_stack; + int num_boxes = ARRAY_LENGTH (boxes_stack); + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_stroke (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, + path, stroke_style, ctm, + clip); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + if (path->is_rectilinear) { + cairo_boxes_t boxes; + + _cairo_boxes_init (&boxes); + _cairo_boxes_limit (&boxes, clip_boxes, num_boxes); + status = _cairo_path_fixed_stroke_rectilinear_to_boxes (path, + stroke_style, + ctm, + &boxes); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = _clip_and_composite_boxes (dst, op, source, + &boxes, antialias, + &extents, clip); + } + + _cairo_boxes_fini (&boxes); + + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + goto CLEANUP_BOXES; + } + + _cairo_polygon_init (&info.polygon); + _cairo_polygon_limit (&info.polygon, clip_boxes, num_boxes); + + status = _cairo_path_fixed_stroke_to_polygon (path, + stroke_style, + ctm, ctm_inverse, + tolerance, + &info.polygon); + if (unlikely (status)) + goto CLEANUP_POLYGON; + + if (extents.is_bounded) { + cairo_rectangle_int_t rect; + + _cairo_box_round_to_rectangle (&info.polygon.extents, &rect); + if (! _cairo_rectangle_intersect (&extents.bounded, &rect)) + goto CLEANUP_POLYGON; + } + + if (info.polygon.num_edges == 0) { + if (! extents.is_bounded) + status = i965_fixup_unbounded (dst, &extents, clip); + } else { + info.fill_rule = CAIRO_FILL_RULE_WINDING; + info.antialias = antialias; + status = i965_clip_and_composite_spans (dst, op, source, antialias, + _composite_polygon_spans, &info, + &extents, clip); + } + +CLEANUP_POLYGON: + _cairo_polygon_fini (&info.polygon); + +CLEANUP_BOXES: + if (clip_boxes != boxes_stack) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static cairo_int_status_t +i965_surface_fill (void *abstract_dst, + cairo_operator_t op, + const cairo_pattern_t*source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + i965_surface_t *dst = abstract_dst; + cairo_composite_rectangles_t extents; + composite_polygon_info_t info; + cairo_box_t boxes_stack[32], *clip_boxes = boxes_stack; + cairo_clip_t local_clip; + cairo_bool_t have_clip = FALSE; + int num_boxes = ARRAY_LENGTH (boxes_stack); + cairo_status_t status; + + status = _cairo_composite_rectangles_init_for_fill (&extents, + dst->intel.drm.width, + dst->intel.drm.height, + op, source, path, + clip); + if (unlikely (status)) + return status; + + if (clip != NULL && _cairo_clip_contains_rectangle (clip, &extents)) + clip = NULL; + + if (clip != NULL) { + clip = _cairo_clip_init_copy (&local_clip, clip); + have_clip = TRUE; + } + + status = _clip_to_boxes (&clip, &extents, &clip_boxes, &num_boxes); + if (unlikely (status)) { + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; + } + + assert (! path->is_empty_fill); + + if (_cairo_path_fixed_is_rectilinear_fill (path)) { + cairo_boxes_t boxes; + + _cairo_boxes_init (&boxes); + _cairo_boxes_limit (&boxes, clip_boxes, num_boxes); + status = _cairo_path_fixed_fill_rectilinear_to_boxes (path, + fill_rule, + &boxes); + if (likely (status == CAIRO_STATUS_SUCCESS)) { + status = _clip_and_composite_boxes (dst, op, source, + &boxes, antialias, + &extents, clip); + } + + _cairo_boxes_fini (&boxes); + + if (status != CAIRO_INT_STATUS_UNSUPPORTED) + goto CLEANUP_BOXES; + } + + _cairo_polygon_init (&info.polygon); + _cairo_polygon_limit (&info.polygon, clip_boxes, num_boxes); + + status = _cairo_path_fixed_fill_to_polygon (path, tolerance, &info.polygon); + if (unlikely (status)) + goto CLEANUP_POLYGON; + + if (extents.is_bounded) { + cairo_rectangle_int_t rect; + + _cairo_box_round_to_rectangle (&info.polygon.extents, &rect); + if (! _cairo_rectangle_intersect (&extents.bounded, &rect)) + goto CLEANUP_POLYGON; + } + + if (info.polygon.num_edges == 0) { + if (! extents.is_bounded) + status = i965_fixup_unbounded (dst, &extents, clip); + } else { + info.fill_rule = fill_rule; + info.antialias = antialias; + status = i965_clip_and_composite_spans (dst, op, source, antialias, + _composite_polygon_spans, &info, + &extents, clip); + } + +CLEANUP_POLYGON: + _cairo_polygon_fini (&info.polygon); + +CLEANUP_BOXES: + if (clip_boxes != boxes_stack) + free (clip_boxes); + + if (have_clip) + _cairo_clip_fini (&local_clip); + + return status; +} + +static const cairo_surface_backend_t i965_surface_backend = { + CAIRO_SURFACE_TYPE_DRM, + + _cairo_drm_surface_create_similar, + i965_surface_finish, + intel_surface_acquire_source_image, + intel_surface_release_source_image, + + NULL, NULL, NULL, + NULL, /* composite */ + NULL, /* fill */ + NULL, /* trapezoids */ + NULL, /* span */ + NULL, /* check-span */ + + NULL, /* copy_page */ + NULL, /* show_page */ + _cairo_drm_surface_get_extents, + NULL, /* old-glyphs */ + _cairo_drm_surface_get_font_options, + + i965_surface_flush, + NULL, /* mark_dirty */ + intel_scaled_font_fini, + intel_scaled_glyph_fini, + + i965_surface_paint, + i965_surface_mask, + i965_surface_stroke, + i965_surface_fill, + i965_surface_glyphs, +}; + +static void +i965_surface_init (i965_surface_t *surface, + cairo_content_t content, + cairo_drm_device_t *device) +{ + intel_surface_init (&surface->intel, &i965_surface_backend, device, content); + surface->stream = 0; +} + +static inline int cairo_const +i965_tiling_stride (uint32_t tiling, int stride) +{ + if (tiling == I915_TILING_NONE) + return stride; + + return (stride + 127) & -128; +} + +static inline int cairo_const +i965_tiling_height (uint32_t tiling, int height) +{ + switch (tiling) { + default: + case I915_TILING_NONE: return (height + 1) & -2; + case I915_TILING_X: return (height + 7) & -8; + case I915_TILING_Y: return (height + 31) & -32; + } +} + +cairo_surface_t * +i965_surface_create_internal (cairo_drm_device_t *base_dev, + cairo_content_t content, + int width, int height, + uint32_t tiling, + cairo_bool_t gpu_target) +{ + i965_surface_t *surface; + cairo_status_t status_ignored; + + surface = malloc (sizeof (i965_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + i965_surface_init (surface, content, base_dev); + + if (width && height) { + uint32_t size; + + surface->intel.drm.width = width; + surface->intel.drm.height = height; + + width = (width + 3) & -4; + surface->intel.drm.stride = cairo_format_stride_for_width (surface->intel.drm.format, + width); + surface->intel.drm.stride = (surface->intel.drm.stride + 63) & ~63; + +#if 0 + /* check for tiny surfaces for which tiling is irrelevant */ + if (height * surface->intel.drm.stride < 4096) + tiling = I915_TILING_NONE; +#endif + surface->intel.drm.stride = i965_tiling_stride (tiling, + surface->intel.drm.stride); + + height = i965_tiling_height (tiling, height); + assert (height <= I965_MAX_SIZE); + + size = surface->intel.drm.stride * height; + if (tiling != I915_TILING_NONE) + size = (size + 4095) & -4096; + + surface->intel.drm.bo = &intel_bo_create (to_intel_device (&base_dev->base), + size, gpu_target)->base; + if (surface->intel.drm.bo == NULL) { + status_ignored = _cairo_drm_surface_finish (&surface->intel.drm); + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + + intel_bo_set_tiling (to_intel_device (&base_dev->base), + to_intel_bo (surface->intel.drm.bo), + tiling, surface->intel.drm.stride); + + assert (surface->intel.drm.bo->size >= (size_t) surface->intel.drm.stride*height); + } + + return &surface->intel.drm.base; +} + +static cairo_surface_t * +i965_surface_create (cairo_drm_device_t *device, + cairo_content_t content, int width, int height) +{ + return i965_surface_create_internal (device, content, width, height, + I965_TILING_DEFAULT, TRUE); +} + +static cairo_surface_t * +i965_surface_create_for_name (cairo_drm_device_t *base_dev, + unsigned int name, + cairo_format_t format, + int width, int height, int stride) +{ + i965_device_t *device; + i965_surface_t *surface; + cairo_content_t content; + cairo_status_t status_ignored; + int min_stride; + + min_stride = cairo_format_stride_for_width (format, (width + 3) & -4); + if (stride < min_stride || stride & 63) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_STRIDE)); + + if (format == CAIRO_FORMAT_A1) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); + + switch (format) { + case CAIRO_FORMAT_ARGB32: + content = CAIRO_CONTENT_COLOR_ALPHA; + break; + case CAIRO_FORMAT_RGB24: + content = CAIRO_CONTENT_COLOR; + break; + case CAIRO_FORMAT_A8: + content = CAIRO_CONTENT_ALPHA; + break; + default: + case CAIRO_FORMAT_A1: + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); + } + + surface = malloc (sizeof (i965_surface_t)); + if (unlikely (surface == NULL)) + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + + i965_surface_init (surface, content, base_dev); + + device = (i965_device_t *) base_dev; + surface->intel.drm.bo = &intel_bo_create_for_name (&device->intel, name)->base; + if (unlikely (surface->intel.drm.bo == NULL)) { + status_ignored = _cairo_drm_surface_finish (&surface->intel.drm); + free (surface); + return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); + } + + surface->intel.drm.width = width; + surface->intel.drm.height = height; + surface->intel.drm.stride = stride; + + return &surface->intel.drm.base; +} + +static cairo_status_t +i965_surface_enable_scan_out (void *abstract_surface) +{ + i965_surface_t *surface = abstract_surface; + intel_bo_t *bo; + + if (unlikely (surface->intel.drm.bo == NULL)) + return _cairo_error (CAIRO_STATUS_INVALID_SIZE); + + bo = to_intel_bo (surface->intel.drm.bo); + if (bo->tiling != I915_TILING_X) { + i965_device_t *device = i965_device (surface); + cairo_surface_pattern_t pattern; + cairo_surface_t *clone; + cairo_status_t status; + + clone = i965_surface_create_internal (&device->intel.base, + surface->intel.drm.base.content, + surface->intel.drm.width, + surface->intel.drm.height, + I915_TILING_X, + TRUE); + if (unlikely (clone->status)) + return clone->status; + + /* 2D blit? */ + _cairo_pattern_init_for_surface (&pattern, &surface->intel.drm.base); + pattern.base.filter = CAIRO_FILTER_NEAREST; + + status = _cairo_surface_paint (clone, + CAIRO_OPERATOR_SOURCE, + &pattern.base, + NULL); + + _cairo_pattern_fini (&pattern.base); + + if (unlikely (status)) { + cairo_surface_destroy (clone); + return status; + } + + /* swap buffer objects */ + surface->intel.drm.bo = ((cairo_drm_surface_t *) clone)->bo; + ((cairo_drm_surface_t *) clone)->bo = &bo->base; + bo = to_intel_bo (surface->intel.drm.bo); + + cairo_surface_destroy (clone); + } + + if (unlikely (bo->tiling == I915_TILING_Y)) + return _cairo_error (CAIRO_STATUS_INVALID_FORMAT); /* XXX */ + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_int_status_t +_i965_device_flush (cairo_drm_device_t *device) +{ + cairo_status_t status; + + status = cairo_device_acquire (&device->base); + if (likely (status == CAIRO_STATUS_SUCCESS)) + status = i965_device_flush ((i965_device_t *) device); + + cairo_device_release (&device->base); + + return status; +} + +static cairo_int_status_t +_i965_device_throttle (cairo_drm_device_t *device) +{ + cairo_status_t status; + + status = cairo_device_acquire (&device->base); + if (unlikely (status)) + return status; + + status = i965_device_flush ((i965_device_t *) device); + intel_throttle ((intel_device_t *) device); + + cairo_device_release (&device->base); + + return status; +} + +static void +_i965_device_destroy (void *base) +{ + i965_device_t *device = base; + + i965_device_reset (device); + i965_general_state_reset (device); + + _cairo_hash_table_destroy (device->sf_states); + _cairo_hash_table_destroy (device->samplers); + _cairo_hash_table_destroy (device->cc_states); + _cairo_hash_table_destroy (device->wm_kernels); + _cairo_hash_table_destroy (device->wm_states); + _cairo_hash_table_destroy (device->wm_bindings); + + _cairo_freelist_fini (&device->sf_freelist); + _cairo_freelist_fini (&device->cc_freelist); + _cairo_freelist_fini (&device->wm_kernel_freelist); + _cairo_freelist_fini (&device->wm_state_freelist); + _cairo_freelist_fini (&device->wm_binding_freelist); + _cairo_freelist_fini (&device->sampler_freelist); + + intel_device_fini (&device->intel); + free (device); +} + +static cairo_bool_t +hash_equal (const void *A, const void *B) +{ + const cairo_hash_entry_t *a = A, *b = B; + return a->hash == b->hash; +} + +cairo_drm_device_t * +_cairo_drm_i965_device_create (int fd, dev_t dev, int vendor_id, int chip_id) +{ + i965_device_t *device; + uint64_t gtt_size; + cairo_status_t status; + + if (! intel_info (fd, >t_size)) + return NULL; + + device = malloc (sizeof (i965_device_t)); + if (unlikely (device == NULL)) + return (cairo_drm_device_t *) _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); + + status = intel_device_init (&device->intel, fd); + if (unlikely (status)) + goto CLEANUP; + + device->is_g4x = IS_G4X (chip_id); + //device->is_g5x = IS_G5X (chip_id); + + device->intel.base.surface.create = i965_surface_create; + device->intel.base.surface.create_for_name = i965_surface_create_for_name; + device->intel.base.surface.create_from_cacheable_image = NULL; + device->intel.base.surface.enable_scan_out = i965_surface_enable_scan_out; + + device->intel.base.device.flush = _i965_device_flush; + device->intel.base.device.throttle = _i965_device_throttle; + device->intel.base.device.destroy = _i965_device_destroy; + + device->sf_states = _cairo_hash_table_create (i965_sf_state_equal); + if (unlikely (device->sf_states == NULL)) + goto CLEANUP_INTEL; + + _cairo_freelist_init (&device->sf_freelist, + sizeof (struct i965_sf_state)); + + + device->cc_states = _cairo_hash_table_create (i965_cc_state_equal); + if (unlikely (device->cc_states == NULL)) + goto CLEANUP_SF; + + _cairo_freelist_init (&device->cc_freelist, + sizeof (struct i965_cc_state)); + + + device->wm_kernels = _cairo_hash_table_create (hash_equal); + if (unlikely (device->wm_kernels == NULL)) + goto CLEANUP_CC; + + _cairo_freelist_init (&device->wm_kernel_freelist, + sizeof (struct i965_wm_kernel)); + + device->wm_states = _cairo_hash_table_create (i965_wm_state_equal); + if (unlikely (device->wm_states == NULL)) + goto CLEANUP_WM_KERNEL; + + _cairo_freelist_init (&device->wm_state_freelist, + sizeof (struct i965_wm_state)); + + + device->wm_bindings = _cairo_hash_table_create (i965_wm_binding_equal); + if (unlikely (device->wm_bindings == NULL)) + goto CLEANUP_WM_STATE; + + _cairo_freelist_init (&device->wm_binding_freelist, + sizeof (struct i965_wm_binding)); + + device->samplers = _cairo_hash_table_create (hash_equal); + if (unlikely (device->samplers == NULL)) + goto CLEANUP_WM_BINDING; + + _cairo_freelist_init (&device->sampler_freelist, + sizeof (struct i965_sampler)); + + i965_stream_init (&device->batch, + device->batch_base, sizeof (device->batch_base), + NULL, 0, + device->batch_relocations, + ARRAY_LENGTH (device->batch_relocations)); + + i965_stream_init (&device->surface, + device->surface_base, sizeof (device->surface_base), + device->surface_pending_relocations, + ARRAY_LENGTH (device->surface_pending_relocations), + device->surface_relocations, + ARRAY_LENGTH (device->surface_relocations)); + + i965_stream_init (&device->general, + device->general_base, sizeof (device->general_base), + device->general_pending_relocations, + ARRAY_LENGTH (device->general_pending_relocations), + NULL, 0); + + i965_stream_init (&device->vertex, + device->vertex_base, sizeof (device->vertex_base), + device->vertex_pending_relocations, + ARRAY_LENGTH (device->vertex_pending_relocations), + NULL, 0); + + i965_stream_init (&device->constant, + device->constant_base, sizeof (device->constant_base), + device->constant_pending_relocations, + ARRAY_LENGTH (device->constant_pending_relocations), + NULL, 0); + + cairo_list_init (&device->flush); + i965_device_reset (device); + device->vs_offset = (uint32_t) -1; + device->border_color_offset = (uint32_t) -1; + device->general_state = NULL; + + return _cairo_drm_device_init (&device->intel.base, + fd, dev, vendor_id, chip_id, + I965_MAX_SIZE); + + CLEANUP_WM_BINDING: + _cairo_hash_table_destroy (device->wm_bindings); + CLEANUP_WM_STATE: + _cairo_hash_table_destroy (device->wm_states); + CLEANUP_WM_KERNEL: + _cairo_hash_table_destroy (device->wm_kernels); + CLEANUP_CC: + _cairo_hash_table_destroy (device->cc_states); + CLEANUP_SF: + _cairo_hash_table_destroy (device->sf_states); + CLEANUP_INTEL: + intel_device_fini (&device->intel); + CLEANUP: + free (device); + return (cairo_drm_device_t *) _cairo_device_create_in_error (status); +} diff --git a/src/drm/cairo-drm-intel-brw-defines.h b/src/drm/cairo-drm-intel-brw-defines.h new file mode 100644 index 00000000..b2be36f1 --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-defines.h @@ -0,0 +1,824 @@ +/************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CAIRO_DRM_INTEL_BRW_DEFINES_H +#define CAIRO_DRM_INTEL_BRW_DEFINES_H + +/* 3D state: */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define BRW_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define BRW_PIPE_CONTROL BRW_3D(3, 2, 0) +#define BRW_PIPE_CONTROL_NOWRITE (0 << 14) +#define BRW_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define BRW_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define BRW_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define BRW_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define BRW_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define BRW_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define BRW_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define BRW_PIPE_CONTROL_LOCAL_PGTT (0 << 2) + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_BORDER_COLOR_MODE_DEFAULT 0 +#define BRW_BORDER_COLOR_MODE_LEGACY 1 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define BRW_VFCOMPONENT_NOSTORE 0 +#define BRW_VFCOMPONENT_STORE_SRC 1 +#define BRW_VFCOMPONENT_STORE_0 2 +#define BRW_VFCOMPONENT_STORE_1_FLT 3 +#define BRW_VFCOMPONENT_STORE_1_INT 4 +#define BRW_VFCOMPONENT_STORE_VID 5 +#define BRW_VFCOMPONENT_STORE_IID 6 +#define BRW_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_O 8 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + +/* media pipeline */ + +#define BRW_VFE_MODE_GENERIC 0x0 +#define BRW_VFE_MODE_VLD_MPEG2 0x1 +#define BRW_VFE_MODE_IS 0x2 +#define BRW_VFE_MODE_AVC_MC 0x4 +#define BRW_VFE_MODE_AVC_IT 0x7 +#define BRW_VFE_MODE_VC1_IT 0xB + +#define BRW_VFE_DEBUG_COUNTER_FREE 0 +#define BRW_VFE_DEBUG_COUNTER_FROZEN 1 +#define BRW_VFE_DEBUG_COUNTER_ONCE 2 +#define BRW_VFE_DEBUG_COUNTER_ALWAYS 3 + +/* VLD_STATE */ +#define BRW_MPEG_TOP_FIELD 1 +#define BRW_MPEG_BOTTOM_FIELD 2 +#define BRW_MPEG_FRAME 3 +#define BRW_MPEG_QSCALE_LINEAR 0 +#define BRW_MPEG_QSCALE_NONLINEAR 1 +#define BRW_MPEG_ZIGZAG_SCAN 0 +#define BRW_MPEG_ALTER_VERTICAL_SCAN 1 +#define BRW_MPEG_I_PICTURE 1 +#define BRW_MPEG_P_PICTURE 2 +#define BRW_MPEG_B_PICTURE 3 + +#endif diff --git a/src/drm/cairo-drm-intel-brw-eu-emit.c b/src/drm/cairo-drm-intel-brw-eu-emit.c new file mode 100644 index 00000000..05bac0a9 --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-eu-emit.c @@ -0,0 +1,1089 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "cairoint.h" +#include "cairo-drm-intel-brw-eu.h" + +#include <string.h> + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +void +brw_instruction_set_destination (struct brw_instruction *insn, + struct brw_reg dest) +{ + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; + } else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; + } else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +void +brw_instruction_set_source0 (struct brw_instruction *insn, + struct brw_reg reg) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } else { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_instruction *insn, + uint32_t msg_length, + uint32_t response_length, + uint32_t function, + uint32_t integer_type, + int low_precision, + int saturate, + uint32_t dataType ) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; +} + +static void brw_set_urb_message( struct brw_instruction *insn, + int allocate, + int used, + uint32_t msg_length, + uint32_t response_length, + int end_of_thread, + int complete, + uint32_t offset, + uint32_t swizzle_control ) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +void +brw_instruction_set_dp_write_message (struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t msg_length, + uint32_t pixel_scoreboard_clear, + uint32_t response_length, + uint32_t end_of_thread) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_read_message( struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t target_cache, + uint32_t msg_length, + uint32_t response_length, + uint32_t end_of_thread ) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + insn->bits3.dp_read.binding_table_index = binding_table_index; + insn->bits3.dp_read.msg_control = msg_control; + insn->bits3.dp_read.msg_type = msg_type; + insn->bits3.dp_read.target_cache = target_cache; + insn->bits3.dp_read.response_length = response_length; + insn->bits3.dp_read.msg_length = msg_length; + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits3.dp_read.end_of_thread = end_of_thread; +} + +static void +brw_set_sampler_message (struct brw_instruction *insn, + cairo_bool_t is_g4x, + uint32_t binding_table_index, + uint32_t sampler, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + cairo_bool_t eot) +{ + brw_set_src1 (insn, brw_imm_d (0)); + + if (is_g4x) { + /* XXX presume the driver is sane! */ + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + insn->bits3.sampler_g4x.response_length = response_length; + insn->bits3.sampler_g4x.msg_length = msg_length; + insn->bits3.sampler_g4x.end_of_thread = eot; + insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } +} + +struct brw_instruction * +brw_next_instruction (struct brw_compile *p, + uint32_t opcode) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: */ + if (p->current->header.destreg__conditonalmod) { + p->current->header.destreg__conditonalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + +static struct brw_instruction *brw_alu1( struct brw_compile *p, + uint32_t opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = brw_next_instruction(p, opcode); + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src); + return insn; +} + +static struct brw_instruction *brw_alu2(struct brw_compile *p, + uint32_t opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = brw_next_instruction(p, opcode); + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ + struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ + struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + + ALU1(MOV) + ALU2(SEL) + ALU1(NOT) + ALU2(AND) + ALU2(OR) + ALU2(XOR) + ALU2(SHR) + ALU2(SHL) + ALU2(RSR) + ALU2(RSL) + ALU2(ASR) + ALU2(ADD) + ALU2(MUL) + ALU1(FRC) + ALU1(RNDD) + ALU1(RNDZ) + ALU2(MAC) + ALU2(MACH) + ALU1(LZD) + ALU2(DP4) + ALU2(DPH) + ALU2(DP3) + ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_NOP); + brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, uint32_t execute_size) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = brw_next_instruction(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = brw_next_instruction(p, BRW_OPCODE_IF); + } + + /* Override the defaults for this instruction: + */ + brw_instruction_set_destination (insn, brw_ip_reg ()); + brw_instruction_set_source0 (insn, brw_ip_reg ()); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + insn = brw_next_instruction(p, BRW_OPCODE_ADD); + } else { + insn = brw_next_instruction(p, BRW_OPCODE_ELSE); + } + + brw_instruction_set_destination (insn, brw_ip_reg ()); + brw_instruction_set_source0 (insn, brw_ip_reg ()); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + /* Patch the if instruction to point at this instruction. + */ + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); + + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pad0 = 0; + } + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; + + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_ENDIF); + + brw_instruction_set_destination(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_instruction_set_source0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = brw_next_instruction(p, BRW_OPCODE_BREAK); + brw_instruction_set_destination(insn, brw_ip_reg()); + brw_instruction_set_source0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d (0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = brw_next_instruction(p, BRW_OPCODE_CONTINUE); + brw_instruction_set_destination(insn, brw_ip_reg()); + brw_instruction_set_source0(insn, brw_ip_reg()); + brw_set_src1 (insn, brw_imm_d (0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +/* DO/WHILE loop: +*/ +struct brw_instruction *brw_DO(struct brw_compile *p, uint32_t execute_size) +{ + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_instruction_set_destination(insn, brw_null_reg()); + brw_instruction_set_source0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + + return insn; + } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) + insn = brw_next_instruction(p, BRW_OPCODE_ADD); + else + insn = brw_next_instruction(p, BRW_OPCODE_WHILE); + + brw_instruction_set_destination(insn, brw_ip_reg()); + brw_instruction_set_source0(insn, brw_ip_reg()); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = do_insn - insn + 1; + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; +} + + +/* FORWARD JUMPS: +*/ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = (landing - jmp_insn) - 1; +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + uint32_t conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditonalmod = conditional; + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src0); + brw_set_src1(insn, src1); + + /* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/* Invert 8 values +*/ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t data_type, + uint32_t precision ) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditonalmod = msg_reg_nr; + + response_length = 1; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/* Use 2 send instructions to invert 16 elements +*/ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t precision ) +{ + struct brw_instruction *insn; + uint32_t msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + uint32_t response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = brw_next_instruction(p, BRW_OPCODE_SEND); + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = brw_next_instruction(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditonalmod = msg_reg_nr+1; + + brw_instruction_set_destination(insn, offset(dest,1)); + brw_instruction_set_source0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + + + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + uint32_t msg_reg_nr, + uint32_t scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV (p, + retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d (scratch_offset)); + + brw_pop_insn_state(p); + } + + { + uint32_t msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src); + + brw_instruction_set_dp_write_message(insn, + 255, /* bti */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } + +} + + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + uint32_t scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV (p, + retype (brw_vec1_grf (0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d (scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); /* UW? */ + brw_instruction_set_source0(insn, retype(brw_vec8_grf(0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(insn, + 255, /* bti */ + 3, /* msg_control */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t msg_length, + uint32_t response_length, + int eot) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src0); + brw_instruction_set_dp_write_message(insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + + +void brw_SAMPLE (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t sampler, + uint32_t writemask, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + cairo_bool_t eot) +{ + int need_stall = 0; + + if(writemask == 0) { + /* printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != WRITEMASK_XYZW) { + uint32_t dst_offset = 0; + uint32_t i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) + break; + dst_offset += 2; + } + for (; i < 4; i++) { + if (!(writemask & (1<<i))) + break; + newmask |= 1<<i; + len++; + } + + if (newmask != writemask) { + need_stall = 1; + /* printf("need stall %x %x\n", newmask , writemask); */ + } + else { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + newmask = ~newmask & WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, m1, brw_vec8_grf(0)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + + brw_pop_insn_state(p); + + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); + response_length = len * 2; + } + } + + { + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_instruction_set_destination(insn, dest); + brw_instruction_set_source0(insn, src0); + brw_set_sampler_message (insn, p->is_g4x, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot); + } + + if (need_stall) + { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, 0); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + int allocate, + int used, + uint32_t msg_length, + uint32_t response_length, + int eot, + int writes_complete, + uint32_t offset, + uint32_t swizzle) +{ + struct brw_instruction *insn = brw_next_instruction(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_instruction_set_destination (insn, dest); + brw_instruction_set_source0 (insn, src0); + brw_set_src1 (insn, brw_imm_d (0)); + + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_urb_message (insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} diff --git a/src/drm/cairo-drm-intel-brw-eu-util.c b/src/drm/cairo-drm-intel-brw-eu-util.c new file mode 100644 index 00000000..592235b1 --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-eu-util.c @@ -0,0 +1,121 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "cairoint.h" +#include "cairo-drm-intel-brw-eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + uint32_t count) +{ + uint32_t i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + uint32_t delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + uint32_t count) +{ + uint32_t i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + uint32_t delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + uint32_t count) +{ + uint32_t i; + + for (i = 0; i < count; i++) + { + uint32_t delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + uint32_t count) +{ + uint32_t i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + uint32_t delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} diff --git a/src/drm/cairo-drm-intel-brw-eu.c b/src/drm/cairo-drm-intel-brw-eu.c new file mode 100644 index 00000000..51c3de4f --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-eu.c @@ -0,0 +1,250 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "cairoint.h" +#include "cairo-drm-intel-brw-eu.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, uint32_t value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, uint32_t pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, uint32_t conditional ) +{ + p->current->header.destreg__conditonalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, uint32_t access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, int compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, uint32_t value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, uint32_t value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + +/************************************************************************/ +void +brw_compile_init (struct brw_compile *p, + cairo_bool_t is_g4x) +{ + p->nr_insn = 0; + p->current = p->stack; + memset (p->current, 0, sizeof (p->current[0])); + + p->is_g4x = is_g4x; + + /* Some defaults? */ + brw_set_mask_control (p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate (p, 0); + brw_set_compression_control (p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value (p, 0xff); +} + +const uint32_t * +brw_get_program (struct brw_compile *p, + uint32_t *sz) +{ + *sz = p->nr_insn * sizeof (struct brw_instruction); + return (const uint32_t *)p->store; +} + + + +/** + * Subroutine calls require special attention. + * Mesa instructions may be expanded into multiple hardware instructions + * so the prog_instruction::BranchTarget field can't be used as an index + * into the hardware instructions. + * + * The BranchTarget field isn't needed, however. Mesa's GLSL compiler + * emits CAL and BGNSUB instructions with labels that can be used to map + * subroutine calls to actual subroutine code blocks. + * + * The structures and function here implement patching of CAL instructions + * so they jump to the right subroutine code... + */ + + +/** + * For each OPCODE_BGNSUB we create one of these. + */ +struct brw_glsl_label +{ + const char *name; /**< the label string */ + uint32_t position; /**< the position of the brw instruction for this label */ + struct brw_glsl_label *next; /**< next in linked list */ +}; + + +/** + * For each OPCODE_CAL we create one of these. + */ +struct brw_glsl_call +{ + uint32_t call_inst_pos; /**< location of the CAL instruction */ + const char *sub_name; /**< name of subroutine to call */ + struct brw_glsl_call *next; /**< next in linked list */ +}; + + +/** + * Called for each OPCODE_BGNSUB. + */ + void +brw_save_label(struct brw_compile *c, const char *name, uint32_t position) +{ + struct brw_glsl_label *label = calloc(1, sizeof *label); + label->name = name; + label->position = position; + label->next = c->first_label; + c->first_label = label; +} + + +/** + * Called for each OPCODE_CAL. + */ + void +brw_save_call(struct brw_compile *c, const char *name, uint32_t call_pos) +{ + struct brw_glsl_call *call = calloc(1, sizeof *call); + call->call_inst_pos = call_pos; + call->sub_name = name; + call->next = c->first_call; + c->first_call = call; +} + + +/** + * Lookup a label, return label's position/offset. + */ + static uint32_t +brw_lookup_label(struct brw_compile *c, const char *name) +{ + const struct brw_glsl_label *label; + for (label = c->first_label; label; label = label->next) { + if (strcmp(name, label->name) == 0) { + return label->position; + } + } + abort(); /* should never happen */ + return ~0; +} + + +/** + * When we're done generating code, this function is called to resolve + * subroutine calls. + */ + void +brw_resolve_cals(struct brw_compile *c) +{ + const struct brw_glsl_call *call; + + for (call = c->first_call; call; call = call->next) { + const uint32_t sub_loc = brw_lookup_label(c, call->sub_name); + struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; + struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; + int32_t offset = brw_sub_inst - brw_call_inst; + + /* patch brw_inst1 to point to brw_inst2 */ + brw_set_src1(brw_call_inst, brw_imm_d(offset * 16)); + } + + /* free linked list of calls */ + { + struct brw_glsl_call *call, *next; + for (call = c->first_call; call; call = next) { + next = call->next; + free(call); + } + c->first_call = NULL; + } + + /* free linked list of labels */ + { + struct brw_glsl_label *label, *next; + for (label = c->first_label; label; label = next) { + next = label->next; + free(label); + } + c->first_label = NULL; + } +} diff --git a/src/drm/cairo-drm-intel-brw-eu.h b/src/drm/cairo-drm-intel-brw-eu.h new file mode 100644 index 00000000..7a2a65ce --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-eu.h @@ -0,0 +1,1043 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef CAIRO_DRM_INTEL_BRW_EU_H +#define CAIRO_DRM_INTEL_BRW_EU_H + +#include "cairo-drm-intel-brw-structs.h" +#include "cairo-drm-intel-brw-defines.h" + +#include <assert.h> + + +/** + * Writemask values, 1 bit per component. + */ +#define WRITEMASK_X 0x1 +#define WRITEMASK_Y 0x2 +#define WRITEMASK_Z 0x4 +#define WRITEMASK_W 0x8 +#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y) +#define WRITEMASK_XZ (WRITEMASK_X | WRITEMASK_Z) +#define WRITEMASK_YZ (WRITEMASK_Y | WRITEMASK_Z) +#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z) +#define WRITEMASK_XW (WRITEMASK_X | WRITEMASK_W) +#define WRITEMASK_YW (WRITEMASK_Y | WRITEMASK_W) +#define WRITEMASK_XYW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_W) +#define WRITEMASK_ZW (WRITEMASK_Z | WRITEMASK_W) +#define WRITEMASK_XZW (WRITEMASK_X | WRITEMASK_Z | WRITEMASK_W) +#define WRITEMASK_YZW (WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W) +#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W) + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4 (0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4 (0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4 (0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4 (0,1,0,1) + +#define REG_SIZE (8*4) + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg { + uint32_t type:4; + uint32_t file:2; + uint32_t nr:8; + uint32_t subnr:5; /* :1 in align16 */ + uint32_t negate:1; /* source only */ + uint32_t abs:1; /* source only */ + uint32_t vstride:4; /* source only */ + uint32_t width:3; /* src only, align1 only */ + uint32_t hstride:2; /* align1 only */ + uint32_t address_mode:1; /* relative addressing, hopefully! */ + uint32_t pad0:1; + + union { + struct { + uint32_t swizzle:8; /* src only, align16 only */ + uint32_t writemask:4; /* dest only, align16 only */ + int32_t indirect_offset:10; /* relative addressing offset */ + uint32_t pad1:10; /* two dwords total */ + } bits; + + float f; + int32_t d; + uint32_t ud; + } dw1; +}; + +struct brw_indirect { + uint32_t addr_subnr:4; + int32_t addr_offset:10; + uint32_t pad:18; +}; + +struct brw_glsl_label; +struct brw_glsl_call; + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 200 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + uint32_t nr_insn; + + cairo_bool_t is_g4x; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + uint32_t flag_value; + int single_program_flow; + struct brw_context *brw; + + struct brw_glsl_label *first_label; /**< linked list of labels */ + struct brw_glsl_call *first_call; /**< linked list of CALs */ +}; + +cairo_private void +brw_save_label (struct brw_compile *c, + const char *name, + uint32_t position); + +cairo_private void +brw_save_call (struct brw_compile *c, + const char *name, + uint32_t call_pos); + +cairo_private void +brw_resolve_cals (struct brw_compile *c); + +static always_inline int +type_sz (uint32_t type) +{ + switch (type) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask WRITEMASK_X/Y/Z/W bitfield + */ +static always_inline struct brw_reg +brw_reg (uint32_t file, + uint32_t nr, + uint32_t subnr, + uint32_t type, + uint32_t vstride, + uint32_t width, + uint32_t hstride, + uint32_t swizzle, + uint32_t writemask) +{ + struct brw_reg reg; + + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < 128); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < 9); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + + return reg; +} + +/** Construct float[16] register */ +static always_inline struct brw_reg +brw_vec16_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static always_inline struct brw_reg +brw_vec8_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static always_inline struct brw_reg +brw_vec4_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static always_inline struct brw_reg +brw_vec2_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + WRITEMASK_XY); +} + +/** Construct float[1] register */ +static always_inline struct brw_reg +brw_vec1_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return brw_reg (file, nr, subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + +static always_inline struct brw_reg +retype (struct brw_reg reg, + uint32_t type) +{ + reg.type = type; + return reg; +} + +static always_inline struct brw_reg +suboffset (struct brw_reg reg, + uint32_t delta) +{ + reg.subnr += delta * type_sz (reg.type); + return reg; +} + +static always_inline struct brw_reg +offset (struct brw_reg reg, + uint32_t delta) +{ + reg.nr += delta; + return reg; +} + +static always_inline struct brw_reg +byte_offset (struct brw_reg reg, + uint32_t bytes) +{ + uint32_t newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + +/** Construct unsigned word[16] register */ +static always_inline struct brw_reg +brw_uw16_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return suboffset (retype (brw_vec16_reg (file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static always_inline struct brw_reg +brw_uw8_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return suboffset (retype (brw_vec8_reg (file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[2] register */ +static always_inline struct brw_reg +brw_uw2_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return suboffset (retype (brw_vec2_reg (file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static always_inline struct brw_reg +brw_uw1_reg (uint32_t file, + uint32_t nr, + uint32_t subnr) +{ + return suboffset (retype (brw_vec1_reg (file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static always_inline struct brw_reg +brw_imm_reg (uint32_t type) +{ + return brw_reg (BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static always_inline struct brw_reg brw_imm_f( float f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static always_inline struct brw_reg brw_imm_d( int32_t d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static always_inline struct brw_reg brw_imm_ud( uint32_t ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static always_inline struct brw_reg brw_imm_uw( uint16_t uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static always_inline struct brw_reg brw_imm_w( int16_t w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static always_inline +struct brw_reg brw_imm_v (uint32_t v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static always_inline struct brw_reg +brw_imm_vf (uint32_t v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static always_inline struct brw_reg +brw_imm_vf4 (uint32_t v0, + uint32_t v1, + uint32_t v2, + uint32_t v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + +static always_inline struct brw_reg +brw_address (struct brw_reg reg) +{ + return brw_imm_uw (reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static always_inline struct brw_reg +brw_vec1_grf (uint32_t nr, uint32_t subnr) +{ + return brw_vec1_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static always_inline struct brw_reg +brw_vec2_grf (uint32_t nr, uint32_t subnr) +{ + return brw_vec2_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static always_inline struct brw_reg +brw_vec4_grf (uint32_t nr, uint32_t subnr) +{ + return brw_vec4_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static always_inline struct brw_reg +brw_vec8_grf (uint32_t nr) +{ + return brw_vec8_reg (BRW_GENERAL_REGISTER_FILE, nr, 0); +} + +static always_inline struct brw_reg +brw_uw8_grf (uint32_t nr, uint32_t subnr) +{ + return brw_uw8_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static always_inline struct brw_reg +brw_uw16_grf (uint32_t nr, uint32_t subnr) +{ + return brw_uw16_reg (BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct null register (usually used for setting condition codes) */ +static always_inline struct brw_reg +brw_null_reg (void) +{ + return brw_vec8_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static always_inline struct brw_reg +brw_address_reg (uint32_t subnr) +{ + return brw_uw1_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static always_inline struct brw_reg +brw_ip_reg (void) +{ + return brw_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +static always_inline struct brw_reg +brw_acc_reg (void) +{ + return brw_vec8_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + +static always_inline struct brw_reg +brw_flag_reg (void) +{ + return brw_uw1_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + +static always_inline struct brw_reg +brw_mask_reg (uint32_t subnr) +{ + return brw_uw1_reg (BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static always_inline struct brw_reg +brw_message4_reg (uint32_t nr) +{ + return brw_vec4_reg (BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + +static always_inline struct brw_reg +brw_message_reg (uint32_t nr) +{ + return brw_vec8_reg (BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static always_inline uint32_t +cvt (uint32_t val) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static always_inline struct brw_reg +stride (struct brw_reg reg, + uint32_t vstride, + uint32_t width, + uint32_t hstride) +{ + reg.vstride = cvt (vstride); + reg.width = cvt (width) - 1; + reg.hstride = cvt (hstride); + return reg; +} + +static always_inline struct brw_reg +vec16 (struct brw_reg reg) +{ + return stride (reg, 16,16,1); +} + +static always_inline struct brw_reg +vec8 (struct brw_reg reg) +{ + return stride (reg, 8,8,1); +} + +static always_inline struct brw_reg +vec4 (struct brw_reg reg) +{ + return stride (reg, 4,4,1); +} + +static always_inline struct brw_reg +vec2 (struct brw_reg reg) +{ + return stride (reg, 2,2,1); +} + +static always_inline struct brw_reg +vec1 (struct brw_reg reg) +{ + return stride (reg, 0,1,0); +} + +static always_inline struct brw_reg +get_element (struct brw_reg reg, uint32_t elt) +{ + return vec1 (suboffset (reg, elt)); +} + +static always_inline struct brw_reg +get_element_ud (struct brw_reg reg, uint32_t elt) +{ + return vec1 (suboffset (retype (reg, BRW_REGISTER_TYPE_UD), elt)); +} + +static always_inline struct brw_reg +brw_swizzle (struct brw_reg reg, + uint32_t x, + uint32_t y, + uint32_t z, + uint32_t w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4 (BRW_GET_SWZ (reg.dw1.bits.swizzle, x), + BRW_GET_SWZ (reg.dw1.bits.swizzle, y), + BRW_GET_SWZ (reg.dw1.bits.swizzle, z), + BRW_GET_SWZ (reg.dw1.bits.swizzle, w)); + return reg; +} + +static always_inline struct brw_reg +brw_swizzle1 (struct brw_reg reg, + uint32_t x) +{ + return brw_swizzle (reg, x, x, x, x); +} + +static always_inline struct brw_reg +brw_writemask (struct brw_reg reg, + uint32_t mask) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static always_inline struct brw_reg +brw_set_writemask (struct brw_reg reg, + uint32_t mask) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static always_inline struct brw_reg +negate (struct brw_reg reg) +{ + reg.negate ^= 1; + return reg; +} + +static always_inline struct brw_reg +brw_abs (struct brw_reg reg) +{ + reg.abs = 1; + return reg; +} + +static always_inline struct brw_reg +brw_vec4_indirect (uint32_t subnr, + int32_t offset) +{ + struct brw_reg reg = brw_vec4_grf (0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static always_inline struct brw_reg +brw_vec1_indirect (uint32_t subnr, + int32_t offset) +{ + struct brw_reg reg = brw_vec1_grf (0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static always_inline struct brw_reg +deref_4f (struct brw_indirect ptr, int32_t offset) +{ + return brw_vec4_indirect (ptr.addr_subnr, ptr.addr_offset + offset); +} + +static always_inline struct brw_reg +deref_1f(struct brw_indirect ptr, int32_t offset) +{ + return brw_vec1_indirect (ptr.addr_subnr, ptr.addr_offset + offset); +} + +static always_inline struct brw_reg +deref_4b(struct brw_indirect ptr, int32_t offset) +{ + return retype (deref_4f (ptr, offset), BRW_REGISTER_TYPE_B); +} + +static always_inline struct brw_reg +deref_1uw(struct brw_indirect ptr, int32_t offset) +{ + return retype (deref_1f (ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static always_inline struct brw_reg +deref_1d (struct brw_indirect ptr, int32_t offset) +{ + return retype (deref_1f (ptr, offset), BRW_REGISTER_TYPE_D); +} + +static always_inline struct brw_reg +deref_1ud (struct brw_indirect ptr, int32_t offset) +{ + return retype (deref_1f (ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static always_inline struct brw_reg +get_addr_reg (struct brw_indirect ptr) +{ + return brw_address_reg (ptr.addr_subnr); +} + +static always_inline struct brw_indirect +brw_indirect_offset (struct brw_indirect ptr, int32_t offset) +{ + ptr.addr_offset += offset; + return ptr; +} + +static always_inline struct brw_indirect +brw_indirect (uint32_t addr_subnr, int32_t offset) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +static always_inline struct brw_instruction * +current_insn (struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +cairo_private void brw_pop_insn_state (struct brw_compile *p); +cairo_private void brw_push_insn_state (struct brw_compile *p); +cairo_private void brw_set_mask_control (struct brw_compile *p, uint32_t value); +cairo_private void brw_set_saturate (struct brw_compile *p, uint32_t value); +cairo_private void brw_set_access_mode (struct brw_compile *p, uint32_t access_mode); +cairo_private void brw_set_compression_control (struct brw_compile *p, int control); +cairo_private void brw_set_predicate_control_flag_value (struct brw_compile *p, uint32_t value); +cairo_private void brw_set_predicate_control (struct brw_compile *p, uint32_t pc); +cairo_private void brw_set_conditionalmod (struct brw_compile *p, uint32_t conditional); + +cairo_private void +brw_compile_init (struct brw_compile *p, + cairo_bool_t is_g4x); +cairo_private const uint32_t *brw_get_program (struct brw_compile *p, uint32_t *sz); + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +cairo_private_no_warn struct brw_instruction * \ +brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +cairo_private_no_warn struct brw_instruction * \ +brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU1(RNDZ) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + +/* Helpers for SEND instruction: */ +cairo_private void +brw_urb_WRITE (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + int allocate, + int used, + uint32_t msg_length, + uint32_t response_length, + int eot, + int writes_complete, + uint32_t offset, + uint32_t swizzle); + +cairo_private void +brw_fb_WRITE (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t msg_length, + uint32_t response_length, + int eot); + +cairo_private void +brw_SAMPLE (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + struct brw_reg src0, + uint32_t binding_table_index, + uint32_t sampler, + uint32_t writemask, + uint32_t msg_type, + uint32_t response_length, + uint32_t msg_length, + cairo_bool_t eot); + +cairo_private void +brw_math_16 (struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t precision); + +cairo_private void +brw_math (struct brw_compile *p, + struct brw_reg dest, + uint32_t function, + uint32_t saturate, + uint32_t msg_reg_nr, + struct brw_reg src, + uint32_t data_type, + uint32_t precision); + +cairo_private void +brw_dp_READ_16 (struct brw_compile *p, + struct brw_reg dest, + uint32_t msg_reg_nr, + uint32_t scratch_offset); + +cairo_private void +brw_dp_WRITE_16 (struct brw_compile *p, + struct brw_reg src, + uint32_t msg_reg_nr, + uint32_t scratch_offset); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +cairo_private struct brw_instruction * +brw_IF (struct brw_compile *p, + uint32_t execute_size); + +cairo_private struct brw_instruction * +brw_ELSE (struct brw_compile *p, + struct brw_instruction *if_insn); + +cairo_private void +brw_ENDIF (struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: */ +cairo_private struct brw_instruction * +brw_DO (struct brw_compile *p, + uint32_t execute_size); + +cairo_private struct brw_instruction * +brw_WHILE (struct brw_compile *p, + struct brw_instruction *patch_insn); + +cairo_private struct brw_instruction * +brw_BREAK (struct brw_compile *p); + +cairo_private struct brw_instruction * +brw_CONT (struct brw_compile *p); + +/* Forward jumps: */ +cairo_private void +brw_land_fwd_jump (struct brw_compile *p, + struct brw_instruction *jmp_insn); + +cairo_private void +brw_NOP (struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +cairo_private void +brw_CMP (struct brw_compile *p, + struct brw_reg dest, + uint32_t conditional, + struct brw_reg src0, + struct brw_reg src1); + +cairo_private void +brw_print_reg (struct brw_reg reg); + +cairo_private struct brw_instruction * +brw_next_instruction (struct brw_compile *p, + uint32_t opcode); + +cairo_private void +brw_instruction_set_destination (struct brw_instruction *insn, + struct brw_reg dest); + +cairo_private void +brw_instruction_set_source0 (struct brw_instruction *insn, + struct brw_reg reg); + +cairo_private void +brw_instruction_set_dp_write_message (struct brw_instruction *insn, + uint32_t binding_table_index, + uint32_t msg_control, + uint32_t msg_type, + uint32_t msg_length, + uint32_t pixel_scoreboard_clear, + uint32_t response_length, + uint32_t end_of_thread); + +/*********************************************************************** + * brw_eu_util.c: + */ + +cairo_private void +brw_copy_indirect_to_indirect (struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + uint32_t count); + +cairo_private void +brw_copy_from_indirect (struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + uint32_t count); + +cairo_private void +brw_copy4 (struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + uint32_t count); + +cairo_private void +brw_copy8 (struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + uint32_t count); + +cairo_private void +brw_math_invert (struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +cairo_private void +brw_set_src1 (struct brw_instruction *insn, + struct brw_reg reg); + +#endif diff --git a/src/drm/cairo-drm-intel-brw-structs.h b/src/drm/cairo-drm-intel-brw-structs.h new file mode 100644 index 00000000..f42483ed --- /dev/null +++ b/src/drm/cairo-drm-intel-brw-structs.h @@ -0,0 +1,1328 @@ +/************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CAIRO_DRM_INTEL_BRW_STRUCTS_H +#define CAIRO_DRM_INTEL_BRW_STRUCTS_H + +#include "cairo-types-private.h" + +/* Command packets: +*/ +struct header { + unsigned int length:16; + unsigned int opcode:16; +}; + +union header_union { + struct header bits; + unsigned int dword; +}; + +struct brw_3d_control { + struct { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:3; + unsigned int wc_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int operation:2; + unsigned int opcode:16; + } header; + + struct { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } dest; + + unsigned int dword2; + unsigned int dword3; +}; + + +struct brw_3d_primitive { + struct { + unsigned int length:8; + unsigned int pad:2; + unsigned int topology:5; + unsigned int indexed:1; + unsigned int opcode:16; + } header; + + unsigned int verts_per_instance; + unsigned int start_vert_location; + unsigned int instance_count; + unsigned int start_instance_location; + unsigned int base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush { + unsigned int flags:4; + unsigned int pad:12; + unsigned int opcode:16; +}; + +struct brw_vf_statistics { + unsigned int statistics_enable:1; + unsigned int pad:15; + unsigned int opcode:16; +}; + + +struct brw_binding_table_pointers { + struct header header; + unsigned int vs; + unsigned int gs; + unsigned int clp; + unsigned int sf; + unsigned int wm; +}; + +struct brw_blend_constant_color { + struct header header; + float blend_constant_color[4]; +}; + +struct brw_depthbuffer { + union header_union header; + + union { + struct { + unsigned int pitch:18; + unsigned int format:3; + unsigned int pad:4; + unsigned int depth_offset_disable:1; + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad2:1; + unsigned int surface_type:3; + } bits; + unsigned int dword; + } dword1; + + unsigned int dword2_base_addr; + + union { + struct { + unsigned int pad:1; + unsigned int mipmap_layout:1; + unsigned int lod:4; + unsigned int width:13; + unsigned int height:13; + } bits; + unsigned int dword; + } dword3; + + union { + struct { + unsigned int pad:12; + unsigned int min_array_element:9; + unsigned int depth:11; + } bits; + unsigned int dword; + } dword4; +}; + +struct brw_drawrect { + struct header header; + unsigned int xmin:16; + unsigned int ymin:16; + unsigned int xmax:16; + unsigned int ymax:16; + unsigned int xorg:16; + unsigned int yorg:16; +}; + +struct brw_global_depth_offset_clamp { + struct header header; + float depth_offset_clamp; +}; + +struct brw_indexbuffer { + union { + struct { + unsigned int length:8; + unsigned int index_format:2; + unsigned int cut_index_enable:1; + unsigned int pad:5; + unsigned int opcode:16; + } bits; + unsigned int dword; + } header; + unsigned int buffer_start; + unsigned int buffer_end; +}; + + +struct brw_line_stipple { + struct header header; + + struct { + unsigned int pattern:16; + unsigned int pad:16; + } bits0; + + struct { + unsigned int repeat_count:9; + unsigned int pad:7; + unsigned int inverse_repeat_count:16; + } bits1; +}; + +struct brw_pipelined_state_pointers { + struct header header; + + struct { + unsigned int pad:5; + unsigned int offset:27; + } vs; + + struct { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } gs; + + struct { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } clp; + + struct { + unsigned int pad:5; + unsigned int offset:27; + } sf; + + struct { + unsigned int pad:5; + unsigned int offset:27; + } wm; + + struct { + unsigned int pad:6; + unsigned int offset:26; + } cc; +}; + +struct brw_polygon_stipple_offset { + struct header header; + + struct { + unsigned int y_offset:5; + unsigned int pad:3; + unsigned int x_offset:5; + unsigned int pad0:19; + } bits0; +}; + +struct brw_polygon_stipple { + struct header header; + unsigned int stipple[32]; +}; + +struct brw_pipeline_select { + struct { + unsigned int pipeline_select:1; + unsigned int pad:15; + unsigned int opcode:16; + } header; +}; + +struct brw_pipe_control { + struct { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:2; + unsigned int instruction_state_cache_flush_enable:1; + unsigned int write_cache_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int post_sync_operation:2; + + unsigned int opcode:16; + } header; + + struct { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } bits1; + + unsigned int data0; + unsigned int data1; +}; + + +struct brw_urb_fence { + struct { + unsigned int length:8; + unsigned int vs_realloc:1; + unsigned int gs_realloc:1; + unsigned int clp_realloc:1; + unsigned int sf_realloc:1; + unsigned int vfe_realloc:1; + unsigned int cs_realloc:1; + unsigned int pad:2; + unsigned int opcode:16; + } header; + + struct { + unsigned int vs_fence:10; + unsigned int gs_fence:10; + unsigned int clp_fence:10; + unsigned int pad:2; + } bits0; + + struct { + unsigned int sf_fence:10; + unsigned int vf_fence:10; + unsigned int cs_fence:10; + unsigned int pad:2; + } bits1; +}; + +struct brw_constant_buffer_state { + struct header header; + + struct { + unsigned int nr_urb_entries:3; + unsigned int pad:1; + unsigned int urb_entry_size:5; + unsigned int pad0:23; + } bits0; +}; + +struct brw_constant_buffer { + struct { + unsigned int length:8; + unsigned int valid:1; + unsigned int pad:7; + unsigned int opcode:16; + } header; + + struct { + unsigned int buffer_length:6; + unsigned int buffer_address:26; + } bits0; +}; + +struct brw_state_base_address { + struct header header; + + struct { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int general_state_address:27; + } bits0; + + struct { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int surface_state_address:27; + } bits1; + + struct { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int indirect_object_state_address:27; + } bits2; + + struct { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int general_state_upper_bound:20; + } bits3; + + struct { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch { + struct header header; + + struct { + unsigned int prefetch_count:3; + unsigned int pad:3; + unsigned int prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer { + struct header header; + + struct { + unsigned int pad:4; + unsigned int system_instruction_pointer:28; + } bits0; +}; + + +/* State structs for the various fixed function units: +*/ + +struct thread0 { + unsigned int pad0:1; + unsigned int grf_reg_count:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer:26; +}; + +struct thread1 { + unsigned int ext_halt_exception_enable:1; + unsigned int sw_exception_enable:1; + unsigned int mask_stack_exception_enable:1; + unsigned int timeout_exception_enable:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad0:3; + unsigned int depth_coef_urb_read_offset:6; /* WM only */ + unsigned int pad1:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad3:5; + unsigned int single_program_flow:1; +}; + +struct thread2 { + unsigned int per_thread_scratch_space:4; + unsigned int pad0:6; + unsigned int scratch_space_base_pointer:22; +}; + +struct thread3 { + unsigned int dispatch_grf_start_reg:4; + unsigned int urb_entry_read_offset:6; + unsigned int pad0:1; + unsigned int urb_entry_read_length:6; + unsigned int pad1:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int pad2:1; + unsigned int const_urb_entry_read_length:6; + unsigned int pad3:1; +}; + +struct brw_clip_unit_state { + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int pad0:9; + unsigned int gs_output_stats:1; /* not always */ + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; /* may be less */ + unsigned int pad3:1; + } thread4; + + struct { + unsigned int pad0:13; + unsigned int clip_mode:3; + unsigned int userclip_enable_flags:8; + unsigned int userclip_must_clip:1; + unsigned int pad1:1; + unsigned int guard_band_enable:1; + unsigned int viewport_z_clip_enable:1; + unsigned int viewport_xy_clip_enable:1; + unsigned int vertex_position_space:1; + unsigned int api_mode:1; + unsigned int pad2:1; + } clip5; + + struct { + unsigned int pad0:5; + unsigned int clipper_viewport_state_ptr:27; + } clip6; + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + +struct brw_cc_unit_state { + struct { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } cc0; + + struct { + unsigned int bf_stencil_ref:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + unsigned int stencil_ref:8; + } cc1; + + struct { + unsigned int logicop_enable:1; + unsigned int pad0:10; + unsigned int depth_write_enable:1; + unsigned int depth_test_function:3; + unsigned int depth_test:1; + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + } cc2; + + struct { + unsigned int pad0:8; + unsigned int alpha_test_func:3; + unsigned int alpha_test:1; + unsigned int blend_enable:1; + unsigned int ia_blend_enable:1; + unsigned int pad1:1; + unsigned int alpha_test_format:1; + unsigned int pad2:16; + } cc3; + + struct { + unsigned int pad0:5; + unsigned int cc_viewport_state_offset:27; + } cc4; + + struct { + unsigned int pad0:2; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_src_blend_factor:5; + unsigned int ia_blend_function:3; + unsigned int statistics_enable:1; + unsigned int logicop_func:4; + unsigned int pad1:11; + unsigned int dither_enable:1; + } cc5; + + struct { + unsigned int clamp_post_alpha_blend:1; + unsigned int clamp_pre_alpha_blend:1; + unsigned int clamp_range:2; + unsigned int pad0:11; + unsigned int y_dither_offset:2; + unsigned int x_dither_offset:2; + unsigned int dest_blend_factor:5; + unsigned int src_blend_factor:5; + unsigned int blend_function:3; + } cc6; + + struct { + union { + float f; + unsigned char ub[4]; + } alpha_ref; + } cc7; +}; + +struct brw_sf_unit_state { + struct thread0 thread0; + struct { + unsigned int pad0:7; + unsigned int sw_exception_enable:1; + unsigned int pad1:3; + unsigned int mask_stack_exception_enable:1; + unsigned int pad2:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad3:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad4:5; + unsigned int single_program_flow:1; + } sf1; + + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; + unsigned int pad3:1; + } thread4; + + struct { + unsigned int front_winding:1; + unsigned int viewport_transform:1; + unsigned int pad0:3; + unsigned int sf_viewport_state_offset:27; + } sf5; + + struct { + unsigned int pad0:9; + unsigned int dest_org_vbias:4; + unsigned int dest_org_hbias:4; + unsigned int scissor:1; + unsigned int disable_2x2_trifilter:1; + unsigned int disable_zero_pix_trifilter:1; + unsigned int point_rast_rule:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int line_width:4; + unsigned int fast_scissor_disable:1; + unsigned int cull_mode:2; + unsigned int aa_enable:1; + } sf6; + + struct { + unsigned int point_size:11; + unsigned int use_point_size_state:1; + unsigned int subpixel_precision:1; + unsigned int sprite_point:1; + unsigned int pad0:11; + unsigned int trifan_pv:2; + unsigned int linestrip_pv:2; + unsigned int tristrip_pv:2; + unsigned int line_last_pixel_enable:1; + } sf7; +}; + +struct brw_gs_unit_state { + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:1; + unsigned int pad3:6; + } thread4; + + struct { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } gs5; + + struct { + unsigned int max_vp_index:4; + unsigned int pad0:26; + unsigned int reorder_enable:1; + unsigned int pad1:1; + } gs6; +}; + +struct brw_vs_unit_state { + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:4; + unsigned int pad3:3; + } thread4; + + struct { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } vs5; + + struct { + unsigned int vs_enable:1; + unsigned int vert_cache_disable:1; + unsigned int pad0:30; + } vs6; +}; + +struct brw_wm_unit_state { + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int stats_enable:1; + unsigned int pad0:1; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } wm4; + + struct { + unsigned int enable_8_pix:1; + unsigned int enable_16_pix:1; + unsigned int enable_32_pix:1; + unsigned int pad0:7; + unsigned int legacy_global_depth_bias:1; + unsigned int line_stipple:1; + unsigned int depth_offset:1; + unsigned int polygon_stipple:1; + unsigned int line_aa_region_width:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int early_depth_test:1; + unsigned int thread_dispatch_enable:1; + unsigned int program_uses_depth:1; + unsigned int program_computes_depth:1; + unsigned int program_uses_killpixel:1; + unsigned int legacy_line_rast: 1; + unsigned int transposed_urb_read:1; + unsigned int max_threads:7; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; +}; + +/* The hardware supports two different modes for border color. The + * default (OpenGL) mode uses floating-point color channels, while the + * legacy mode uses 4 bytes. + * + * More significantly, the legacy mode respects the components of the + * border color for channels not present in the source, (whereas the + * default mode will ignore the border color's alpha channel and use + * alpha==1 for an RGB source, for example). + * + * The legacy mode matches the semantics specified by the Render + * extension. + */ +struct brw_sampler_default_border_color { + float color[4]; +}; + +struct brw_sampler_legacy_border_color { + uint8_t color[4]; +}; + +struct brw_sampler_state { + struct { + unsigned int shadow_function:3; + unsigned int lod_bias:11; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; + unsigned int pad:1; + unsigned int lod_preclamp:1; + unsigned int border_color_mode:1; + unsigned int pad0:1; + unsigned int disable:1; + } ss0; + + struct { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; + unsigned int pad:3; + unsigned int max_lod:10; + unsigned int min_lod:10; + } ss1; + + struct { + unsigned int pad:5; + unsigned int border_color_pointer:27; + } ss2; + + struct { + unsigned int pad:19; + unsigned int max_aniso:3; + unsigned int chroma_key_mode:1; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int monochrome_filter_width:3; + unsigned int monochrome_filter_height:3; + } ss3; +}; + +struct brw_clipper_viewport { + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct brw_cc_viewport { + float min_depth; + float max_depth; +}; + +struct brw_sf_viewport { + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + struct { + short xmin; + short ymin; + short xmax; + short ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... +*/ +struct brw_surface_state { + struct { + unsigned int cube_pos_z:1; + unsigned int cube_neg_z:1; + unsigned int cube_pos_y:1; + unsigned int cube_neg_y:1; + unsigned int cube_pos_x:1; + unsigned int cube_neg_x:1; + unsigned int pad:3; + unsigned int render_cache_read_mode:1; + unsigned int mipmap_layout_mode:1; + unsigned int vert_line_stride_ofs:1; + unsigned int vert_line_stride:1; + unsigned int color_blend:1; + unsigned int writedisable_blue:1; + unsigned int writedisable_green:1; + unsigned int writedisable_red:1; + unsigned int writedisable_alpha:1; + unsigned int surface_format:9; + unsigned int data_return_format:1; + unsigned int pad0:1; + unsigned int surface_type:3; + } ss0; + + struct { + unsigned int base_addr; + } ss1; + + struct { + unsigned int render_target_rotation:2; + unsigned int mip_count:4; + unsigned int width:13; + unsigned int height:13; + } ss2; + + struct { + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad:1; + unsigned int pitch:18; + unsigned int depth:11; + } ss3; + + struct { + unsigned int pad:19; + unsigned int min_array_elt:9; + unsigned int min_lod:4; + } ss4; + + struct { + unsigned int pad:20; + unsigned int y_offset:4; + unsigned int pad2:1; + unsigned int x_offset:7; + } ss5; +}; + +struct brw_vertex_buffer_state { + struct { + unsigned int pitch:11; + unsigned int pad:15; + unsigned int access_type:1; + unsigned int vb_index:5; + } vb0; + + unsigned int start_addr; + unsigned int max_index; +#if 1 + unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + +struct brw_vertex_element_state { + struct { + unsigned int src_offset:11; + unsigned int pad:5; + unsigned int src_format:9; + unsigned int pad0:1; + unsigned int valid:1; + unsigned int vertex_buffer_index:5; + } ve0; + + struct { + unsigned int dst_offset:8; + unsigned int pad:8; + unsigned int vfcomponent3:4; + unsigned int vfcomponent2:4; + unsigned int vfcomponent1:4; + unsigned int vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; +}; + +struct brw_urb_immediate { + unsigned int opcode:4; + unsigned int offset:6; + unsigned int swizzle_control:2; + unsigned int pad:1; + unsigned int allocate:1; + unsigned int used:1; + unsigned int complete:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; +}; + +/* Instruction format for the execution units: */ + +struct brw_instruction { + struct { + unsigned int opcode:7; + unsigned int pad:1; + unsigned int access_mode:1; + unsigned int mask_control:1; + unsigned int dependency_control:2; + unsigned int compression_control:2; + unsigned int thread_control:2; + unsigned int predicate_control:4; + unsigned int predicate_inverse:1; + unsigned int execution_size:3; + unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + unsigned int pad0:2; + unsigned int debug_control:1; + unsigned int saturate:1; + } header; + + union { + struct { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad:1; + unsigned int dest_subreg_nr:5; + unsigned int dest_reg_nr:8; + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } da1; + + struct { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad:6; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } ia1; + + struct { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad0:1; + unsigned int dest_writemask:4; + unsigned int dest_subreg_nr:1; + unsigned int dest_reg_nr:8; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } da16; + + struct { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad0:6; + unsigned int dest_writemask:4; + int dest_indirect_offset:6; + unsigned int dest_subreg_nr:3; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct { + unsigned int src0_subreg_nr:5; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } da1; + + struct { + int src0_indirect_offset:10; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } ia1; + + struct { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + unsigned int src0_subreg_nr:1; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } da16; + + struct { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + int src0_indirect_offset:6; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia16; + + } bits2; + + union { + struct { + unsigned int src1_subreg_nr:5; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int pad0:7; + } da1; + + struct { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + unsigned int src1_subreg_nr:1; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int pad2:7; + } da16; + + struct { + int src1_indirect_offset:10; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia1; + + struct { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + int src1_indirect_offset:6; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad2:6; + } ia16; + + struct { + int jump_count:16; /* note: signed */ + unsigned int pop_count:4; + unsigned int pad0:12; + } if_else; + + struct { + unsigned int function:4; + unsigned int int_type:1; + unsigned int precision:1; + unsigned int saturate:1; + unsigned int data_type:1; + unsigned int pad0:8; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } math; + + struct { + unsigned int binding_table_index:8; + unsigned int sampler:4; + unsigned int return_format:2; + unsigned int msg_type:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } sampler; + + struct { + uint32_t binding_table_index:8; + uint32_t sampler:4; + uint32_t msg_type:4; + uint32_t response_length:4; + uint32_t msg_length:4; + uint32_t msg_target:4; + uint32_t pad1:3; + uint32_t end_of_thread:1; + } sampler_g4x; + + struct brw_urb_immediate urb; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:4; + unsigned int msg_type:2; + unsigned int target_cache:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_read; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:3; + unsigned int pixel_scoreboard_clear:1; + unsigned int msg_type:3; + unsigned int send_commit_msg:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_write; + + struct { + unsigned int pad:16; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } generic; + + uint32_t ud; + int32_t d; + } bits3; +}; + +/* media pipeline */ + +struct brw_vfe_state { + struct { + unsigned int per_thread_scratch_space:4; + unsigned int pad3:3; + unsigned int extend_vfe_state_present:1; + unsigned int pad2:2; + unsigned int scratch_base:22; + } vfe0; + + struct { + unsigned int debug_counter_control:2; + unsigned int children_present:1; + unsigned int vfe_mode:4; + unsigned int pad2:2; + unsigned int num_urb_entries:7; + unsigned int urb_entry_alloc_size:9; + unsigned int max_threads:7; + } vfe1; + + struct { + unsigned int pad4:4; + unsigned int interface_descriptor_base:28; + } vfe2; +}; + +struct brw_vld_state { + struct { + unsigned int pad6:6; + unsigned int scan_order:1; + unsigned int intra_vlc_format:1; + unsigned int quantizer_scale_type:1; + unsigned int concealment_motion_vector:1; + unsigned int frame_predict_frame_dct:1; + unsigned int top_field_first:1; + unsigned int picture_structure:2; + unsigned int intra_dc_precision:2; + unsigned int f_code_0_0:4; + unsigned int f_code_0_1:4; + unsigned int f_code_1_0:4; + unsigned int f_code_1_1:4; + } vld0; + + struct { + unsigned int pad2:9; + unsigned int picture_coding_type:2; + unsigned int pad:21; + } vld1; + + struct { + unsigned int index_0:4; + unsigned int index_1:4; + unsigned int index_2:4; + unsigned int index_3:4; + unsigned int index_4:4; + unsigned int index_5:4; + unsigned int index_6:4; + unsigned int index_7:4; + } desc_remap_table0; + + struct { + unsigned int index_8:4; + unsigned int index_9:4; + unsigned int index_10:4; + unsigned int index_11:4; + unsigned int index_12:4; + unsigned int index_13:4; + unsigned int index_14:4; + unsigned int index_15:4; + } desc_remap_table1; +}; + +struct brw_interface_descriptor { + struct { + unsigned int grf_reg_blocks:4; + unsigned int pad:2; + unsigned int kernel_start_pointer:26; + } desc0; + + struct { + unsigned int pad:7; + unsigned int software_exception:1; + unsigned int pad2:3; + unsigned int maskstack_exception:1; + unsigned int pad3:1; + unsigned int illegal_opcode_exception:1; + unsigned int pad4:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int single_program_flow:1; + unsigned int pad5:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int const_urb_entry_read_len:6; + } desc1; + + struct { + unsigned int pad:2; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } desc2; + + struct { + unsigned int binding_table_entry_count:5; + unsigned int binding_table_pointer:27; + } desc3; +}; + +#endif diff --git a/src/drm/cairo-drm-intel-command-private.h b/src/drm/cairo-drm-intel-command-private.h new file mode 100644 index 00000000..3860c3f2 --- /dev/null +++ b/src/drm/cairo-drm-intel-command-private.h @@ -0,0 +1,909 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CAIRO_DRM_INTEL_COMMAND_PRIVATE_H +#define CAIRO_DRM_INTEL_COMMAND_PRIVATE_H + +#include "cairo-types-private.h" + +#define CMD_MI (0x0 << 29) +#define CMD_misc (0x1 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) +/* 4-7 reserved */ + +#define MI_NOOP (CMD_MI | 0) +/* Batch */ +#define MI_BATCH_BUFFER (CMD_MI | (0x30 << 23) | 1) +#define MI_BATCH_BUFFER_START (CMD_MI | (0x31 << 23)) +#define MI_BATCH_BUFFER_END (CMD_MI | (0x0a << 23)) +#define MI_BATCH_NON_SECURE (1) +#define MI_BATCH_NON_SECURE_I965 (1 << 8) +/* Flush */ +#define MI_FLUSH (CMD_MI | (0x04 << 23)) +#define MI_WRITE_DIRTY_STATE (1<<4) +#define MI_END_SCENE (1<<3) +#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3) +#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2) +#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1) +#define MI_INVALIDATE_MAP_CACHE (1<<0) + +#define PRIM3D (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (PRIM3D | (0x0<<18)) +#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18)) +#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18)) +#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18)) +#define PRIM3D_POLY (PRIM3D | (0x4<<18)) +#define PRIM3D_LINELIST (PRIM3D | (0x5<<18)) +#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18)) +#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18)) +#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18)) +#define PRIM3D_DIB (PRIM3D | (0x9<<18)) +#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18)) +#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18)) +#define PRIM3D_MASK (0x1f<<18) +#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17)) +#define PRIM3D_INDIRECT_ELTS ((1<<23) | (1<<17)) + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (x) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + +/* The depth subrectangle is not supported, but must be disabled. */ +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0)) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLR_BUF_8BIT 0 +#define COLR_BUF_RGB555 (1<<8) +#define COLR_BUF_RGB565 (2<<8) +#define COLR_BUF_ARGB8888 (3<<8) +#define COLR_BUF_ARGB4444 (8<<8) +#define COLR_BUF_ARGB1555 (9<<8) +#define COLR_BUF_ARGB2AAA (0xa<<8) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 (0<<1) +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0U) + +#define TEXCOORD_WRAP_SHORTEST_TCX 8 +#define TEXCOORD_WRAP_SHORTEST_TCY 4 +#define TEXCOORD_WRAP_SHORTEST_TCZ 2 +#define TEXCOORD_PERSPECTIVE_DISABLE 1 + +#define S3_WRAP_SHORTEST_TCX(unit) (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCY(unit) (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCZ(unit) (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4)) +#define S3_PERSPECTIVE_DISABLE(unit) (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4)) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + +/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ + +/* _3DSTATE_MODES_4, p218 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK (0xf<<18) +#define LOGICOP_COPY 0xc +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) ((x)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p220 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define PIPELINE_FLUSH_RENDER_CACHE (1<<18) +#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16) + +/* p221 */ +#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16)) +#define PS1_REG(n) (1<<(n)) +#define PS2_CONST_X(n) (n) +#define PS3_CONST_Y(n) (n) +#define PS4_CONST_Z(n) (n) +#define PS5_CONST_W(n) (n) + +/* p222 */ + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + +/* p207. + * The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK + */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +/* The pitch is the pitch measured in DWORDS, minus 1 */ +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244. + * The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK. + */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define FLUSH_MAP_CACHE (1<<0) +#define FLUSH_RENDER_CACHE (1<<1) + +/* BLT commands */ +#define COLOR_BLT_CMD (CMD_2D | (0x40 << 22) | 3) +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) +#define XY_SETUP_CLIP_BLT_CMD (CMD_2D | (0x03 << 22) | 1) +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) +#define SRC_COPY_BLT_CMD (CMD_2D | (0x43 << 22) | 4) + +#define XY_MONO_PAT_BLT_CMD (CMD_2D | (0x52<<22)|0x7) +#define XY_MONO_PAT_VERT_SEED ((1<<10) | (1<<9)|(1<<8)) +#define XY_MONO_PAT_HORT_SEED ((1<<14) | (1<<13)|(1<<12)) +#define XY_MONO_SRC_BLT_CMD (CMD_2D | (0x54<<22)|(0x6)) + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) +#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22)) +#define XY_TEXT_BYTE_PACKED (1 << 16) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#endif /* CAIRO_DRM_INTEL_COMMAND_PRIVATE_H */ diff --git a/src/drm/cairo-drm-intel-debug.c b/src/drm/cairo-drm-intel-debug.c new file mode 100644 index 00000000..cc2e47a1 --- /dev/null +++ b/src/drm/cairo-drm-intel-debug.c @@ -0,0 +1,1208 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "cairoint.h" +#include "cairo-drm-intel-private.h" + +struct debug_stream { + unsigned offset; /* current gtt offset */ + const char *ptr; /* pointer to gtt offset zero */ + const char *end; /* pointer to gtt offset zero */ +}; + +static cairo_bool_t +debug (struct debug_stream *stream, const char *name, uint32_t len) +{ + uint32_t i; + const uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + + if (len == 0) { + fprintf (stderr, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + ASSERT_NOT_REACHED; + return FALSE; + } + + fprintf (stderr, "%04x: ", stream->offset); + + fprintf (stderr, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + fprintf (stderr, "\t0x%08x\n", ptr[i]); + fprintf (stderr, "\n"); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + + +static const char * +get_prim_name (uint32_t val) +{ + switch (val & PRIM3D_MASK) { + case PRIM3D_TRILIST: return "TRILIST"; + case PRIM3D_TRISTRIP: return "TRISTRIP"; + case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; + case PRIM3D_TRIFAN: return "TRIFAN"; + case PRIM3D_POLY: return "POLY"; + case PRIM3D_LINELIST: return "LINELIST"; + case PRIM3D_LINESTRIP: return "LINESTRIP"; + case PRIM3D_RECTLIST: return "RECTLIST"; + case PRIM3D_POINTLIST: return "POINTLIST"; + case PRIM3D_DIB: return "DIB"; + case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; + case PRIM3D_ZONE_INIT: return "ZONE_INIT"; + default: return "????"; + } +} + +static cairo_bool_t +debug_prim (struct debug_stream *stream, + const char *name, + cairo_bool_t dump_floats, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + uint32_t i; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s %s (%d dwords):\n", name, prim, len); + fprintf (stderr, "\t0x%08x\n", ptr[0]); + for (i = 1; i < len; i++) { + if (dump_floats) + fprintf (stderr, "\t0x%08x // %f\n", ptr[i], *(float *)&ptr[i]); + else + fprintf (stderr, "\t0x%08x\n", ptr[i]); + } + + fprintf (stderr, "\n"); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +static const char *opcodes[] = { + "NOP", + "ADD", + "MOV", + "MUL", + "MAD", + "DP2ADD", + "DP3", + "DP4", + "FRC", + "RCP", + "RSQ", + "EXP", + "LOG", + "CMP", + "MIN", + "MAX", + "FLR", + "MOD", + "TRC", + "SGE", + "SLT", + "TEXLD", + "TEXLDP", + "TEXLDB", + "TEXKILL", + "DCL", + "0x1a", + "0x1b", + "0x1c", + "0x1d", + "0x1e", + "0x1f", +}; + +static const int args[] = { + 0, /* 0 nop */ + 2, /* 1 add */ + 1, /* 2 mov */ + 2, /* 3 m ul */ + 3, /* 4 mad */ + 3, /* 5 dp2add */ + 2, /* 6 dp3 */ + 2, /* 7 dp4 */ + 1, /* 8 frc */ + 1, /* 9 rcp */ + 1, /* a rsq */ + 1, /* b exp */ + 1, /* c log */ + 3, /* d cmp */ + 2, /* e min */ + 2, /* f max */ + 1, /* 10 flr */ + 1, /* 11 mod */ + 1, /* 12 trc */ + 2, /* 13 sge */ + 2, /* 14 slt */ + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + +static const char *regname[] = { + "R", + "T", + "CONST", + "S", + "OC", + "OD", + "U", + "UNKNOWN", +}; + +static void +print_reg_type_nr(uint32_t type, uint32_t nr) +{ + switch (type) { + case REG_TYPE_T: + switch (nr) { + case T_DIFFUSE: + fprintf (stderr, "T_DIFFUSE"); + return; + case T_SPECULAR: + fprintf (stderr, "T_SPECULAR"); + return; + case T_FOG_W: + fprintf (stderr, "T_FOG_W"); + return; + default: + fprintf (stderr, "T_TEX%d", nr); + return; + } + case REG_TYPE_OC: + if (nr == 0) { + fprintf (stderr, "oC"); + return; + } + break; + case REG_TYPE_OD: + if (nr == 0) { + fprintf (stderr, "oD"); + return; + } + break; + default: + break; + } + + fprintf (stderr, "%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ + (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ + (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ + (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + +static void +print_reg_neg_swizzle(uint32_t reg) +{ + int i; + + if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && + (reg & REG_NEGATE_MASK) == 0) + return; + + fprintf (stderr, "."); + + for (i = 12; i >= 0; i -= 4) { + if (reg & (8 << i)) + fprintf (stderr, "-"); + + switch ((reg >> i) & 0x7) { + case 0: + fprintf (stderr, "x"); + break; + case 1: + fprintf (stderr, "y"); + break; + case 2: + fprintf (stderr, "z"); + break; + case 3: + fprintf (stderr, "w"); + break; + case 4: + fprintf (stderr, "0"); + break; + case 5: + fprintf (stderr, "1"); + break; + default: + fprintf (stderr, "?"); + break; + } + } +} + +static void +print_src_reg(uint32_t dword) +{ + uint32_t nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; + uint32_t type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(type, nr); + print_reg_neg_swizzle(dword); +} + +static void +print_dest_reg(uint32_t dword) +{ + uint32_t nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; + uint32_t type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(type, nr); + if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) + return; + fprintf (stderr, "."); + if (dword & A0_DEST_CHANNEL_X) + fprintf (stderr, "x"); + if (dword & A0_DEST_CHANNEL_Y) + fprintf (stderr, "y"); + if (dword & A0_DEST_CHANNEL_Z) + fprintf (stderr, "z"); + if (dword & A0_DEST_CHANNEL_W) + fprintf (stderr, "w"); +} + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r) (r) + +static void +print_arith_op(uint32_t opcode, const uint32_t * program) +{ + if (opcode != A0_NOP) { + print_dest_reg(program[0]); + if (program[0] & A0_DEST_SATURATE) + fprintf (stderr, " = SATURATE "); + else + fprintf (stderr, " = "); + } + + fprintf (stderr, "%s ", opcodes[opcode]); + + print_src_reg(GET_SRC0_REG(program[0], program[1])); + if (args[opcode] == 1) { + fprintf (stderr, "\n"); + return; + } + + fprintf (stderr, ", "); + print_src_reg(GET_SRC1_REG(program[1], program[2])); + if (args[opcode] == 2) { + fprintf (stderr, "\n"); + return; + } + + fprintf (stderr, ", "); + print_src_reg(GET_SRC2_REG(program[2])); + fprintf (stderr, "\n"); + return; +} + +static void +print_tex_op(uint32_t opcode, const uint32_t * program) +{ + print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); + fprintf (stderr, " = "); + + fprintf (stderr, "%s ", opcodes[opcode]); + + fprintf (stderr, "S[%d],", program[0] & T0_SAMPLER_NR_MASK); + + print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + fprintf (stderr, "\n"); +} + +static void +print_dcl_op(uint32_t opcode, const uint32_t * program) +{ + fprintf (stderr, "%s ", opcodes[opcode]); + print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL); + fprintf (stderr, "\n"); +} + +static void +i915_disassemble_program (const uint32_t * program, uint32_t sz) +{ + uint32_t size = program[0] & 0x1ff; + uint32_t i; + + fprintf (stderr, "\tPROGRAM\n"); + + assert(size + 2 == sz); + + program++; + for (i = 1; i < sz; i += 3, program += 3) { + uint32_t opcode = program[0] & (0x1f << 24); + + fprintf (stderr, "\t\t"); + + if ((int) opcode >= A0_NOP && opcode <= A0_SLT) + print_arith_op(opcode >> 24, program); + else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL) + print_tex_op(opcode >> 24, program); + else if (opcode == D0_DCL) + print_dcl_op(opcode >> 24, program); + else + fprintf (stderr, "Unknown opcode 0x%x\n", opcode); + } + + fprintf (stderr, "\tEND-PROGRAM\n\n"); +} + +static cairo_bool_t +debug_program (struct debug_stream *stream, const char *name, uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + + if (len == 0) { + fprintf (stderr, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + ASSERT_NOT_REACHED; + return FALSE; + } + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + i915_disassemble_program (ptr, len); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +static cairo_bool_t +debug_chain (struct debug_stream *stream, const char *name, uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t old_offset = stream->offset + len * sizeof(uint32_t); + uint32_t i; + + fprintf (stderr, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + fprintf (stderr, "\t0x%08x\n", ptr[i]); + + stream->offset = ptr[1] & ~0x3; + + if (stream->offset < old_offset) + fprintf (stderr, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + else + fprintf (stderr, "\n... skipping from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + + return TRUE; +} + +static cairo_bool_t +debug_variable_length_prim (struct debug_stream *stream) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + uint32_t i, len; + + uint16_t *idx = (uint16_t *)(ptr+1); + for (i = 0; idx[i] != 0xffff; i++) + ; + + len = 1+(i+2)/2; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); + for (i = 0; i < len; i++) + fprintf (stderr, "\t0x%08x\n", ptr[i]); + fprintf (stderr, "\n"); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +#define BITS(dw, hi, lo, ...) \ + do { \ + unsigned himask = 0xffffffffU >> (31 - (hi)); \ + fprintf (stderr, "\t\t "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, ": 0x%x\n", ((dw) & himask) >> (lo)); \ + } while (0) + +#define MBZ(dw, hi, lo) do { \ + unsigned x = (dw) >> (lo); \ + unsigned lomask = (1 << (lo)) - 1; \ + unsigned himask; \ + himask = (1UL << (hi)) - 1; \ + assert ((x & himask & ~lomask) == 0); \ +} while (0) + +#define FLAG(dw, bit, ... ) \ + do { \ + if (((dw) >> (bit)) & 1) { \ + fprintf (stderr, "\t\t "); \ + fprintf (stderr, __VA_ARGS__); \ + fprintf (stderr, "\n"); \ + } \ + } while (0) + +static cairo_bool_t +debug_load_immediate (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t bits = (ptr[0] >> 4) & 0xff; + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords, flags: %x):\n", name, len, bits); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + if (bits & (1<<0)) { + fprintf (stderr, "\t LIS0: 0x%08x\n", ptr[j]); + fprintf (stderr, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); + BITS (ptr[j], 0, 0, "vb invalidate disable"); + j++; + } + if (bits & (1<<1)) { + fprintf (stderr, "\t LIS1: 0x%08x\n", ptr[j]); + BITS (ptr[j], 29, 24, "vb dword width"); + BITS (ptr[j], 21, 16, "vb dword pitch"); + BITS (ptr[j], 15, 0, "vb max index"); + j++; + } + if (bits & (1<<2)) { + int i; + fprintf (stderr, "\t LIS2: 0x%08x\n", ptr[j]); + for (i = 0; i < 8; i++) { + unsigned tc = (ptr[j] >> (i * 4)) & 0xf; + if (tc != 0xf) + BITS (tc, 3, 0, "tex coord %d", i); + } + j++; + } + if (bits & (1<<3)) { + fprintf (stderr, "\t LIS3: 0x%08x\n", ptr[j]); + j++; + } + if (bits & (1<<4)) { + fprintf (stderr, "\t LIS4: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 23, "point width"); + BITS (ptr[j], 22, 19, "line width"); + FLAG (ptr[j], 18, "alpha flatshade"); + FLAG (ptr[j], 17, "fog flatshade"); + FLAG (ptr[j], 16, "spec flatshade"); + FLAG (ptr[j], 15, "rgb flatshade"); + BITS (ptr[j], 14, 13, "cull mode"); + FLAG (ptr[j], 12, "vfmt: point width"); + FLAG (ptr[j], 11, "vfmt: specular/fog"); + FLAG (ptr[j], 10, "vfmt: rgba"); + FLAG (ptr[j], 9, "vfmt: depth offset"); + BITS (ptr[j], 8, 6, "vfmt: position (2==xyzw)"); + FLAG (ptr[j], 5, "force dflt diffuse"); + FLAG (ptr[j], 4, "force dflt specular"); + FLAG (ptr[j], 3, "local depth offset enable"); + FLAG (ptr[j], 2, "vfmt: fp32 fog coord"); + FLAG (ptr[j], 1, "sprite point"); + FLAG (ptr[j], 0, "antialiasing"); + j++; + } + if (bits & (1<<5)) { + fprintf (stderr, "\t LIS5: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 28, "rgba write disables"); + FLAG (ptr[j], 27, "force dflt point width"); + FLAG (ptr[j], 26, "last pixel enable"); + FLAG (ptr[j], 25, "global z offset enable"); + FLAG (ptr[j], 24, "fog enable"); + BITS (ptr[j], 23, 16, "stencil ref"); + BITS (ptr[j], 15, 13, "stencil test"); + BITS (ptr[j], 12, 10, "stencil fail op"); + BITS (ptr[j], 9, 7, "stencil pass z fail op"); + BITS (ptr[j], 6, 4, "stencil pass z pass op"); + FLAG (ptr[j], 3, "stencil write enable"); + FLAG (ptr[j], 2, "stencil test enable"); + FLAG (ptr[j], 1, "color dither enable"); + FLAG (ptr[j], 0, "logiop enable"); + j++; + } + if (bits & (1<<6)) { + fprintf (stderr, "\t LIS6: 0x%08x\n", ptr[j]); + FLAG (ptr[j], 31, "alpha test enable"); + BITS (ptr[j], 30, 28, "alpha func"); + BITS (ptr[j], 27, 20, "alpha ref"); + FLAG (ptr[j], 19, "depth test enable"); + BITS (ptr[j], 18, 16, "depth func"); + FLAG (ptr[j], 15, "blend enable"); + BITS (ptr[j], 14, 12, "blend func"); + BITS (ptr[j], 11, 8, "blend src factor"); + BITS (ptr[j], 7, 4, "blend dst factor"); + FLAG (ptr[j], 3, "depth write enable"); + FLAG (ptr[j], 2, "color write enable"); + BITS (ptr[j], 1, 0, "provoking vertex"); + j++; + } + + fprintf (stderr, "\n"); + + assert(j == len); + + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +static cairo_bool_t +debug_load_indirect (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t bits = (ptr[0] >> 8) & 0x3f; + uint32_t i, j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + for (i = 0; i < 6; i++) { + if (bits & (1<<i)) { + switch (1<<(8+i)) { + case LI0_STATE_STATIC_INDIRECT: + fprintf (stderr, " STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_DYNAMIC_INDIRECT: + fprintf (stderr, " DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + break; + case LI0_STATE_SAMPLER: + fprintf (stderr, " SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_MAP: + fprintf (stderr, " MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_PROGRAM: + fprintf (stderr, " PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_CONSTANTS: + fprintf (stderr, " CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + fprintf (stderr, " 0x%08x\n", ptr[j++]); + break; + default: + ASSERT_NOT_REACHED; + break; + } + } + } + + if (bits == 0) { + fprintf (stderr, "\t DUMMY: 0x%08x\n", ptr[j++]); + } + + fprintf (stderr, "\n"); + + assert(j == len); + stream->offset += len * sizeof(uint32_t); + return TRUE; +} + +static void +BR13 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x\n", val); + FLAG (val, 30, "clipping enable"); + BITS (val, 25, 24, "color depth (3==32bpp)"); + BITS (val, 23, 16, "raster op"); + BITS (val, 15, 0, "dest pitch"); +} + +static void +BR2223 (struct debug_stream *stream, + uint32_t val22, uint32_t val23) +{ + union { uint32_t val; short field[2]; } BR22, BR23; + + BR22.val = val22; + BR23.val = val23; + + fprintf (stderr, "\t0x%08x\n", val22); + BITS (val22, 31, 16, "dest y1"); + BITS (val22, 15, 0, "dest x1"); + + fprintf (stderr, "\t0x%08x\n", val23); + BITS (val23, 31, 16, "dest y2"); + BITS (val23, 15, 0, "dest x2"); + + /* The blit engine may produce unexpected results when these aren't met */ + assert(BR22.field[0] < BR23.field[0]); + assert(BR22.field[1] < BR23.field[1]); +} + +static void +BR09 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x -- dest address\n", val); +} + +static void +BR26 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x\n", val); + BITS (val, 31, 16, "src y1"); + BITS (val, 15, 0, "src x1"); +} + +static void +BR11 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x\n", val); + BITS (val, 15, 0, "src pitch"); +} + +static void +BR12 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x -- src address\n", val); +} + +static void +BR16 (struct debug_stream *stream, + uint32_t val) +{ + fprintf (stderr, "\t0x%08x -- color\n", val); +} + +static cairo_bool_t +debug_copy_blit (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR2223(stream, ptr[j], ptr[j+1]); + j += 2; + BR09(stream, ptr[j++]); + BR26(stream, ptr[j++]); + BR11(stream, ptr[j++]); + BR12(stream, ptr[j++]); + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_color_blit (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR2223(stream, ptr[j], ptr[j+1]); + j += 2; + BR09(stream, ptr[j++]); + BR16(stream, ptr[j++]); + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_modes4 (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j]); + BITS (ptr[j], 21, 18, "logicop func"); + FLAG (ptr[j], 17, "stencil test mask modify-enable"); + FLAG (ptr[j], 16, "stencil write mask modify-enable"); + BITS (ptr[j], 15, 8, "stencil test mask"); + BITS (ptr[j], 7, 0, "stencil write mask"); + fprintf (stderr, "\n"); + j++; + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_map_state (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + { + fprintf (stderr, "\t0x%08x\n", ptr[j]); + BITS (ptr[j], 15, 0, "map mask"); + j++; + } + + while (j < len) { + { + fprintf (stderr, "\t TMn.0: 0x%08x\n", ptr[j]); + fprintf (stderr, "\t map address: 0x%08x\n", (ptr[j] & ~0x3)); + FLAG (ptr[j], 1, "vertical line stride"); + FLAG (ptr[j], 0, "vertical line stride offset"); + j++; + } + + { + fprintf (stderr, "\t TMn.1: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 21, "height"); + BITS (ptr[j], 20, 10, "width"); + BITS (ptr[j], 9, 7, "surface format"); + BITS (ptr[j], 6, 3, "texel format"); + FLAG (ptr[j], 2, "use fence regs"); + FLAG (ptr[j], 1, "tiled surface"); + FLAG (ptr[j], 0, "tile walk ymajor"); + j++; + } + { + fprintf (stderr, "\t TMn.2: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 21, "dword pitch"); + BITS (ptr[j], 20, 15, "cube face enables"); + BITS (ptr[j], 14, 9, "max lod"); + FLAG (ptr[j], 8, "mip layout right"); + BITS (ptr[j], 7, 0, "depth"); + j++; + } + } + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_sampler_state (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + { + fprintf (stderr, "\t0x%08x\n", ptr[j]); + BITS (ptr[j], 15, 0, "sampler mask"); + j++; + } + + while (j < len) { + { + fprintf (stderr, "\t TSn.0: 0x%08x\n", ptr[j]); + FLAG (ptr[j], 31, "reverse gamma"); + FLAG (ptr[j], 30, "planar to packed"); + FLAG (ptr[j], 29, "yuv->rgb"); + BITS (ptr[j], 28, 27, "chromakey index"); + BITS (ptr[j], 26, 22, "base mip level"); + BITS (ptr[j], 21, 20, "mip mode filter"); + BITS (ptr[j], 19, 17, "mag mode filter"); + BITS (ptr[j], 16, 14, "min mode filter"); + BITS (ptr[j], 13, 5, "lod bias (s4.4)"); + FLAG (ptr[j], 4, "shadow enable"); + FLAG (ptr[j], 3, "max-aniso-4"); + BITS (ptr[j], 2, 0, "shadow func"); + j++; + } + + { + fprintf (stderr, "\t TSn.1: 0x%08x\n", ptr[j]); + BITS (ptr[j], 31, 24, "min lod"); + MBZ( ptr[j], 23, 18 ); + FLAG (ptr[j], 17, "kill pixel enable"); + FLAG (ptr[j], 16, "keyed tex filter mode"); + FLAG (ptr[j], 15, "chromakey enable"); + BITS (ptr[j], 14, 12, "tcx wrap mode"); + BITS (ptr[j], 11, 9, "tcy wrap mode"); + BITS (ptr[j], 8, 6, "tcz wrap mode"); + FLAG (ptr[j], 5, "normalized coords"); + BITS (ptr[j], 4, 1, "map (surface) index"); + FLAG (ptr[j], 0, "EAST deinterlacer enable"); + j++; + } + { + fprintf (stderr, "\t TSn.2: 0x%08x (default color)\n", ptr[j]); + j++; + } + } + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +debug_dest_vars (struct debug_stream *stream, + const char *name, + uint32_t len) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + { + fprintf (stderr, "\t0x%08x\n", ptr[j]); + FLAG (ptr[j], 31, "early classic ztest"); + FLAG (ptr[j], 30, "opengl tex default color"); + FLAG (ptr[j], 29, "bypass iz"); + FLAG (ptr[j], 28, "lod preclamp"); + BITS (ptr[j], 27, 26, "dither pattern"); + FLAG (ptr[j], 25, "linear gamma blend"); + FLAG (ptr[j], 24, "debug dither"); + BITS (ptr[j], 23, 20, "dstorg x"); + BITS (ptr[j], 19, 16, "dstorg y"); + MBZ (ptr[j], 15, 15 ); + BITS (ptr[j], 14, 12, "422 write select"); + BITS (ptr[j], 11, 8, "cbuf format"); + BITS (ptr[j], 3, 2, "zbuf format"); + FLAG (ptr[j], 1, "vert line stride"); + FLAG (ptr[j], 1, "vert line stride offset"); + j++; + } + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t debug_buf_info( struct debug_stream *stream, + const char *name, + uint32_t len ) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t j = 0; + + fprintf (stderr, "%04x: ", stream->offset); + fprintf (stderr, "%s (%d dwords):\n", name, len); + fprintf (stderr, "\t0x%08x\n", ptr[j++]); + + { + fprintf (stderr, "\t0x%08x\n", ptr[j]); + BITS (ptr[j], 28, 28, "aux buffer id"); + BITS (ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); + FLAG (ptr[j], 23, "use fence regs"); + FLAG (ptr[j], 22, "tiled surface"); + FLAG (ptr[j], 21, "tile walk ymajor"); + MBZ (ptr[j], 20, 14); + BITS (ptr[j], 13, 2, "dword pitch"); + MBZ (ptr[j], 2, 0); + j++; + } + + fprintf (stderr, "\t0x%08x -- buffer base address\n", ptr[j++]); + + stream->offset += len * sizeof(uint32_t); + assert(j == len); + return TRUE; +} + +static cairo_bool_t +decode_3d_i915 (struct debug_stream *stream) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t cmd = *ptr; + + switch ((cmd >> 24) & 0x1f) { + case 0x6: + return debug (stream, "3DSTATE_ANTI_ALIASING", 1); + case 0x7: + return debug (stream, "3DSTATE_RASTERIZATION_RULES", 1); + case 0x8: + return debug (stream, "3DSTATE_BACKFACE_STENCIL_OPS", 1); + case 0x9: + return debug (stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); + case 0xb: + return debug (stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); + case 0xc: + return debug (stream, "3DSTATE_MODES5", 1); + case 0xd: + return debug_modes4(stream, "3DSTATE_MODES4", 1); + case 0x15: + return debug (stream, "3DSTATE_FOG_COLOR", 1); + case 0x16: + return debug (stream, "3DSTATE_COORD_SET_BINDINGS", 1); + case 0x1c: + /* 3DState16NP */ + switch((cmd >> 19) & 0x1f) { + case 0x10: + return debug (stream, "3DSTATE_SCISSOR_ENABLE", 1); + case 0x11: + return debug (stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); + default: + break; + } + break; + case 0x1d: + /* 3DStateMW */ + switch ((cmd >> 16) & 0xff) { + case 0x0: + return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); + case 0x1: + return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); + case 0x4: + return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); + case 0x5: + return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); + case 0x6: + return debug (stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); + case 0x7: + return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); + case 0x80: + return debug (stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); + case 0x81: + return debug (stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); + case 0x83: + return debug (stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); + case 0x85: + return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); + case 0x88: + return debug (stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); + case 0x89: + return debug (stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); + case 0x8e: + return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); + case 0x97: + return debug (stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); + case 0x98: + return debug (stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); + case 0x99: + return debug (stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); + case 0x9a: + return debug (stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); + case 0x9c: + return debug (stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); + default: + ASSERT_NOT_REACHED; + return 0; + } + break; + case 0x1e: + if (cmd & (1 << 23)) + return debug (stream, "???", (cmd & 0xffff) + 1); + else + return debug (stream, "", 1); + break; + case 0x1f: + if ((cmd & (1 << 23)) == 0) { + return debug_prim (stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); + } else if (cmd & (1 << 17)) { + if ((cmd & 0xffff) == 0) + return debug_variable_length_prim (stream); + else + return debug_prim (stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); + } else + return debug_prim (stream, "3DPRIM (indirect sequential)", 0, 2); + break; + default: + return debug (stream, "", 0); + } + + return FALSE; +} + +static cairo_bool_t +decode_3d_i965 (struct debug_stream *stream) +{ + const uint32_t *data = (uint32_t *) (stream->ptr + stream->offset); + const uint32_t opcode = (data[0] & 0xffff0000) >> 16; + unsigned int idx; + const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes_3d[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_STATE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + }, *opcode_3d; + + for (idx = 0; idx < ARRAY_LENGTH (opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if (opcode == opcode_3d->opcode) { + unsigned int len = 1; + if (opcode_3d->max_len != 1) + len = (data[0] & 0x000000ff) + 2; + return debug (stream, opcode_3d->name, len); + } + } + + return FALSE; +} + +static cairo_bool_t +decode_3d_i830 (struct debug_stream *stream) +{ + ASSERT_NOT_REACHED; + return FALSE; +} + +static cairo_bool_t +i915_debug_packet (struct debug_stream *stream, + int devid) +{ + uint32_t *ptr = (uint32_t *)(stream->ptr + stream->offset); + uint32_t cmd = *ptr; + + switch (((cmd >> 29) & 0x7)) { + case 0x0: + switch ((cmd >> 23) & 0x3f) { + case 0x0: + return debug (stream, "MI_NOOP", 1); + case 0x3: + return debug (stream, "MI_WAIT_FOR_EVENT", 1); + case 0x4: + return debug (stream, "MI_FLUSH", 1); + case 0xA: + debug (stream, "MI_BATCH_BUFFER_END", 1); + return FALSE; + case 0x22: + return debug (stream, "MI_LOAD_REGISTER_IMM", 3); + case 0x31: + return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); + default: + break; + } + break; + case 0x1: + break; + case 0x2: + switch ((cmd >> 22) & 0xff) { + case 0x50: + return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); + case 0x53: + return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); + default: + return debug (stream, "blit command", (cmd & 0xff) + 2); + } + break; + case 0x3: + if (IS_965(devid)) + return decode_3d_i965 (stream); + else if (IS_9XX(devid)) + return decode_3d_i915 (stream); + else + return decode_3d_i830 (stream); + default: + break; + } + + fprintf (stderr, "Bogus cmd: %x [%x]\n", (cmd >> 29) & 7, cmd); + ASSERT_NOT_REACHED; + return 0; +} + +void +intel_dump_batchbuffer (const void *batch, + uint32_t length, + int devid) +{ + struct debug_stream stream; + cairo_bool_t done = FALSE; + + fprintf (stderr, "\nBATCH: (%d dwords)\n", length / 4); + + stream.offset = 0; + stream.ptr = batch; + + while (! done && stream.offset < length) { + if (! i915_debug_packet (&stream, devid)) + break; + + assert (stream.offset <= length); + } + + fprintf (stderr, "END-BATCH\n\n"); + fflush (stderr); +} diff --git a/src/drm/cairo-drm-intel-ioctl-private.h b/src/drm/cairo-drm-intel-ioctl-private.h new file mode 100644 index 00000000..74d76b9a --- /dev/null +++ b/src/drm/cairo-drm-intel-ioctl-private.h @@ -0,0 +1,417 @@ +/* Cairo - a vector graphics library with display and print output + * + * Copyright © 2009 Chris Wilson + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + */ + +#ifndef CAIRO_DRM_INTEL_IOCTL_PRIVATE_H +#define CAIRO_DRM_INTEL_IOCTL_PRIVATE_H + +#include "cairo-drm-intel-command-private.h" + +/** @{ + * Intel memory domains + * + * Most of these just align with the various caches in + * the system and are used to flush and invalidate as + * objects end up cached in different domains. + */ +/** CPU cache */ +#define I915_GEM_DOMAIN_CPU 0x00000001 +/** Render cache, used by 2D and 3D drawing */ +#define I915_GEM_DOMAIN_RENDER 0x00000002 +/** Sampler cache, used by texture engine */ +#define I915_GEM_DOMAIN_SAMPLER 0x00000004 +/** Command queue, used to load batch buffers */ +#define I915_GEM_DOMAIN_COMMAND 0x00000008 +/** Instruction cache, used by shader programs */ +#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 +/** Vertex address cache */ +#define I915_GEM_DOMAIN_VERTEX 0x00000020 +/** GTT domain - aperture and scanout */ +#define I915_GEM_DOMAIN_GTT 0x00000040 +/** @} */ + +#define I915_TILING_NONE 0 +#define I915_TILING_X 1 +#define I915_TILING_Y 2 + +#define I915_BIT_6_SWIZZLE_NONE 0 +#define I915_BIT_6_SWIZZLE_9 1 +#define I915_BIT_6_SWIZZLE_9_10 2 +#define I915_BIT_6_SWIZZLE_9_11 3 +#define I915_BIT_6_SWIZZLE_9_10_11 4 + +#define DRM_I915_GEM_EXECBUFFER 0x14 +#define DRM_I915_GEM_BUSY 0x17 +#define DRM_I915_GEM_THROTTLE 0x18 +#define DRM_I915_GEM_CREATE 0x1b +#define DRM_I915_GEM_PREAD 0x1c +#define DRM_I915_GEM_PWRITE 0x1d +#define DRM_I915_GEM_MMAP 0x1e +#define DRM_I915_GEM_SET_DOMAIN 0x1f +#define DRM_I915_GEM_SET_TILING 0x21 +#define DRM_I915_GEM_GET_TILING 0x22 +#define DRM_I915_GEM_GET_APERTURE 0x23 +#define DRM_I915_GEM_MMAP_GTT 0x24 + +struct drm_i915_gem_create { + /** + * Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + */ + uint64_t size; + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + uint32_t handle; + uint32_t pad; +}; + +struct drm_i915_gem_pread { + /** Handle for the object being read. */ + uint32_t handle; + uint32_t pad; + /** Offset into the object to read from */ + uint64_t offset; + /** Length of data to read */ + uint64_t size; + /** + * Pointer to write the data into. + * + * This is a fixed-size type for 32/64 compatibility. + */ + uint64_t data_ptr; +}; + +struct drm_i915_gem_pwrite { + /** Handle for the object being written to. */ + uint32_t handle; + uint32_t pad; + /** Offset into the object to write to */ + uint64_t offset; + /** Length of data to write */ + uint64_t size; + /** + * Pointer to read the data from. + * + * This is a fixed-size type for 32/64 compatibility. + */ + uint64_t data_ptr; +}; + +struct drm_i915_gem_mmap { + /** Handle for the object being mapped. */ + uint32_t handle; + uint32_t pad; + /** Offset in the object to map. */ + uint64_t offset; + /** + * Length of data to map. + * + * The value will be page-aligned. + */ + uint64_t size; + /** + * Returned pointer the data was mapped at. + * + * This is a fixed-size type for 32/64 compatibility. + */ + uint64_t addr_ptr; +}; + +struct drm_i915_gem_mmap_gtt { + /** Handle for the object being mapped. */ + uint32_t handle; + uint32_t pad; + /** + * Fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + uint64_t offset; +}; + +struct drm_i915_gem_set_domain { + /** Handle for the object */ + uint32_t handle; + + /** New read domains */ + uint32_t read_domains; + + /** New write domain */ + uint32_t write_domain; +}; + +struct drm_i915_gem_relocation_entry { + /** + * Handle of the buffer being pointed to by this relocation entry. + * + * It's appealing to make this be an index into the mm_validate_entry + * list to refer to the buffer, but this allows the driver to create + * a relocation list for state buffers and not re-write it per + * exec using the buffer. + */ + uint32_t target_handle; + + /** + * Value to be added to the offset of the target buffer to make up + * the relocation entry. + */ + uint32_t delta; + + /** Offset in the buffer the relocation entry will be written into */ + uint64_t offset; + + /** + * Offset value of the target buffer that the relocation entry was last + * written as. + * + * If the buffer has the same offset as last time, we can skip syncing + * and writing the relocation. This value is written back out by + * the execbuffer ioctl when the relocation is written. + */ + uint64_t presumed_offset; + + /** + * Target memory domains read by this operation. + */ + uint32_t read_domains; + + /** + * Target memory domains written by this operation. + * + * Note that only one domain may be written by the whole + * execbuffer operation, so that where there are conflicts, + * the application will get -EINVAL back. + */ + uint32_t write_domain; +}; + +struct drm_i915_gem_exec_object { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + uint32_t handle; + + /** Number of relocations to be performed on this buffer */ + uint32_t relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + uint64_t relocs_ptr; + + /** Required alignment in graphics aperture */ + uint64_t alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + uint64_t offset; +}; + +struct drm_i915_gem_execbuffer { + /** + * List of buffers to be validated with their relocations to be + * performend on them. + * + * This is a pointer to an array of struct drm_i915_gem_validate_entry. + * + * These buffers must be listed in an order such that all relocations + * a buffer is performing refer to buffers that have already appeared + * in the validate list. + */ + uint64_t buffers_ptr; + uint32_t buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + uint32_t batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + uint32_t batch_len; + uint32_t DR1; + uint32_t DR4; + uint32_t num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + uint64_t cliprects_ptr; +}; + +struct drm_i915_gem_busy { + /** Handle of the buffer to check for busy */ + uint32_t handle; + + /** Return busy status (1 if busy, 0 if idle) */ + uint32_t busy; +}; + +struct drm_i915_gem_set_tiling { + /** Handle of the buffer to have its tiling state updated */ + uint32_t handle; + + /** + * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + * + * This value is to be set on request, and will be updated by the + * kernel on successful return with the actual chosen tiling layout. + * + * The tiling mode may be demoted to I915_TILING_NONE when the system + * has bit 6 swizzling that can't be managed correctly by GEM. + * + * Buffer contents become undefined when changing tiling_mode. + */ + uint32_t tiling_mode; + + /** + * Stride in bytes for the object when in I915_TILING_X or + * I915_TILING_Y. + */ + uint32_t stride; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + uint32_t swizzle_mode; +}; + +struct drm_i915_gem_get_tiling { + /** Handle of the buffer to get tiling state for. */ + uint32_t handle; + + /** + * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + */ + uint32_t tiling_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + uint32_t swizzle_mode; +}; + +struct drm_i915_gem_get_aperture { + /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ + uint64_t aper_size; + + /** + * Available space in the aperture used by i915_gem_execbuffer, in + * bytes + */ + uint64_t aper_available_size; +}; + + +#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) +#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) +#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) +#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) +#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) +#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) +#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) +#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) +#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) +#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) +#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) + +#define I915_MADV_WILLNEED 0 +#define I915_MADV_DONTNEED 1 + +struct drm_i915_gem_madvise { + uint32_t handle; + uint32_t madv; + uint32_t retained; +}; +#define DRM_I915_GEM_MADVISE 0x26 +#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) + + +/* XXX execbuffer2 */ +struct drm_i915_gem_exec_object2 { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + uint32_t handle; + + /** Number of relocations to be performed on this buffer */ + uint32_t relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + uint64_t relocs_ptr; + + /** Required alignment in graphics aperture */ + uint64_t alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + uint64_t offset; + +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) + uint64_t flags; + uint64_t rsvd1; + uint64_t rsvd2; +}; + +struct drm_i915_gem_execbuffer2 { + /** + * List of gem_exec_object2 structs + */ + uint64_t buffers_ptr; + uint32_t buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + uint32_t batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + uint32_t batch_len; + uint32_t DR1; + uint32_t DR4; + uint32_t num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + uint64_t cliprects_ptr; + uint64_t flags; + uint64_t rsvd1; + uint64_t rsvd2; +}; + +#define I915_GEM_3D_PIPELINE 0x1 +#define I915_GEM_MEDIA_PIPELINE 0x2 +#define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) + +#endif /* CAIRO_DRM_INTEL_IOCTL_PRIVATE_H */ diff --git a/src/drm/cairo-drm-intel-private.h b/src/drm/cairo-drm-intel-private.h index f5791f3c..eedb700b 100644 --- a/src/drm/cairo-drm-intel-private.h +++ b/src/drm/cairo-drm-intel-private.h @@ -30,51 +30,30 @@ #ifndef CAIRO_DRM_INTEL_PRIVATE_H #define CAIRO_DRM_INTEL_PRIVATE_H +#include "cairoint.h" +#include "cairo-cache-private.h" #include "cairo-compiler-private.h" -#include "cairo-types-private.h" #include "cairo-drm-private.h" -#include "cairo-list-private.h" #include "cairo-freelist-private.h" +#include "cairo-list-private.h" #include "cairo-mutex-private.h" +#include "cairo-rtree-private.h" +#include "cairo-types-private.h" -/** @{ - * Intel memory domains - * - * Most of these just align with the various caches in - * the system and are used to flush and invalidate as - * objects end up cached in different domains. - */ -/** CPU cache */ -#define I915_GEM_DOMAIN_CPU 0x00000001 -/** Render cache, used by 2D and 3D drawing */ -#define I915_GEM_DOMAIN_RENDER 0x00000002 -/** Sampler cache, used by texture engine */ -#define I915_GEM_DOMAIN_SAMPLER 0x00000004 -/** Command queue, used to load batch buffers */ -#define I915_GEM_DOMAIN_COMMAND 0x00000008 -/** Instruction cache, used by shader programs */ -#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 -/** Vertex address cache */ -#define I915_GEM_DOMAIN_VERTEX 0x00000020 -/** GTT domain - aperture and scanout */ -#define I915_GEM_DOMAIN_GTT 0x00000040 -/** @} */ - -#define I915_TILING_NONE 0 -#define I915_TILING_X 1 -#define I915_TILING_Y 2 - -#define I915_BIT_6_SWIZZLE_NONE 0 -#define I915_BIT_6_SWIZZLE_9 1 -#define I915_BIT_6_SWIZZLE_9_10 2 -#define I915_BIT_6_SWIZZLE_9_11 3 -#define I915_BIT_6_SWIZZLE_9_10_11 4 +#include "cairo-drm-intel-ioctl-private.h" + +#define NEAREST_BIAS (-.375) #define INTEL_TILING_DEFAULT I915_TILING_Y #define INTEL_BO_CACHE_BUCKETS 12 /* cache surfaces up to 16 MiB */ +#define INTEL_GLYPH_CACHE_WIDTH 1024 +#define INTEL_GLYPH_CACHE_HEIGHT 1024 +#define INTEL_GLYPH_CACHE_MIN_SIZE 1 +#define INTEL_GLYPH_CACHE_MAX_SIZE 128 + typedef struct _intel_bo { cairo_drm_bo_t base; @@ -86,12 +65,125 @@ typedef struct _intel_bo { uint32_t tiling; uint32_t swizzle; uint32_t stride; + cairo_bool_t purgeable; + + uint32_t opaque0; + uint32_t opaque1; - cairo_bool_t in_batch; - uint32_t read_domains; - uint32_t write_domain; + struct drm_i915_gem_exec_object2 *exec; + uint32_t batch_read_domains; + uint32_t batch_write_domain; + + cairo_list_t link; } intel_bo_t; +#define INTEL_BATCH_SIZE (64*1024) +#define INTEL_VERTEX_BUFFER_SIZE (512*1024) +#define INTEL_MAX_RELOCS 2048 + +static inline void +intel_bo_mark_purgeable (intel_bo_t *bo, + cairo_bool_t purgeable) +{ + if (bo->base.name == 0) + bo->purgeable = purgeable; +} + +typedef struct _intel_vertex_buffer intel_vertex_buffer_t; + +typedef void (*intel_vertex_buffer_new_func_t) (intel_vertex_buffer_t *vertex_buffer); +typedef void (*intel_vertex_buffer_start_rectangles_func_t) (intel_vertex_buffer_t *vertex_buffer, + uint32_t floats_per_vertex); +typedef void (*intel_vertex_buffer_flush_func_t) (intel_vertex_buffer_t *vertex_buffer); +typedef void (*intel_vertex_buffer_finish_func_t) (intel_vertex_buffer_t *vertex_buffer); + +struct _intel_vertex_buffer { + uint32_t vbo_batch; /* reloc position in batch, 0 -> not yet allocated */ + uint32_t vbo_offset; + uint32_t vbo_used; + + uint32_t vertex_index; + uint32_t vertex_count; + + uint32_t floats_per_vertex; + uint32_t rectangle_size; + + intel_bo_t *last_vbo; + uint32_t last_vbo_offset; + uint32_t last_vbo_space; + + intel_vertex_buffer_new_func_t new; + intel_vertex_buffer_start_rectangles_func_t start_rectangles; + intel_vertex_buffer_flush_func_t flush; + intel_vertex_buffer_finish_func_t finish; + + uint32_t base[INTEL_VERTEX_BUFFER_SIZE / sizeof (uint32_t)]; +}; + +typedef struct _intel_batch intel_batch_t; + +typedef void (*intel_batch_commit_func_t) (intel_batch_t *batch); +typedef void (*intel_batch_reset_func_t) (intel_batch_t *batch); + +struct _intel_batch { + size_t gtt_size; + size_t gtt_avail_size; + + intel_batch_commit_func_t commit; + intel_batch_reset_func_t reset; + + uint16_t exec_count; + uint16_t reloc_count; + uint16_t used; + uint16_t header; + + intel_bo_t *target_bo[INTEL_MAX_RELOCS]; + struct drm_i915_gem_exec_object2 exec[INTEL_MAX_RELOCS]; + struct drm_i915_gem_relocation_entry reloc[INTEL_MAX_RELOCS]; + + uint32_t base[INTEL_BATCH_SIZE / sizeof (uint32_t)]; + + intel_vertex_buffer_t vertex_buffer; +}; + +typedef struct _intel_buffer { + intel_bo_t *bo; + uint32_t offset; + cairo_format_t format; + uint32_t map0, map1; + uint32_t width; + uint32_t height; + uint32_t stride; +} intel_buffer_t; + +typedef struct _intel_buffer_cache { + int ref_count; + intel_buffer_t buffer; + cairo_rtree_t rtree; + cairo_list_t link; +} intel_buffer_cache_t; + +typedef struct _intel_glyph { + cairo_rtree_node_t node; + intel_buffer_cache_t *cache; + void **owner; + float texcoord[3]; +} intel_glyph_t; + +typedef struct _intel_gradient_cache { + cairo_pattern_union_t pattern; + intel_buffer_t buffer; +} intel_gradient_cache_t; +#define GRADIENT_CACHE_SIZE 16 + +typedef struct _intel_surface { + cairo_drm_surface_t drm; + + cairo_cache_entry_t snapshot_cache_entry; +} intel_surface_t; + +typedef void (*intel_reset_context_func_t) (void *device); + typedef struct _intel_device { cairo_drm_device_t base; @@ -108,8 +200,54 @@ typedef struct _intel_device { size_t bo_cache_size; size_t bo_max_cache_size_high; size_t bo_max_cache_size_low; + + cairo_mutex_t mutex; + intel_batch_t batch; + + cairo_bool_t glyph_cache_mapped; + intel_buffer_cache_t glyph_cache[2]; + + struct { + intel_gradient_cache_t cache[GRADIENT_CACHE_SIZE]; + unsigned int size; + } gradient_cache; + + cairo_cache_t snapshot_cache; + size_t snapshot_cache_max_size; + + intel_reset_context_func_t reset_context; + + cairo_status_t (*flush) (struct _intel_device *); } intel_device_t; +static inline intel_device_t * +to_intel_device (cairo_device_t *base) +{ + return (intel_device_t *) base; +} + +static inline intel_bo_t * +to_intel_bo (cairo_drm_bo_t *base) +{ + return (intel_bo_t *) base; +} + +static inline intel_bo_t * +intel_bo_reference (intel_bo_t *bo) +{ + return to_intel_bo (cairo_drm_bo_reference (&bo->base)); +} + +cairo_private cairo_bool_t +intel_bo_madvise (intel_device_t *device, intel_bo_t *bo, int madv); + + +static always_inline void +intel_bo_destroy (intel_device_t *device, intel_bo_t *bo) +{ + cairo_drm_bo_destroy (&device->base.base, &bo->base); +} + cairo_private cairo_bool_t intel_info (int fd, uint64_t *gtt_size); @@ -119,23 +257,27 @@ intel_device_init (intel_device_t *device, int fd); cairo_private void intel_device_fini (intel_device_t *dev); -cairo_private cairo_drm_bo_t * +cairo_private intel_bo_t * intel_bo_create (intel_device_t *dev, uint32_t size, cairo_bool_t gpu_target); -cairo_private void -intel_bo_release (void *_dev, void *_bo); - -cairo_private cairo_drm_bo_t * +cairo_private intel_bo_t * intel_bo_create_for_name (intel_device_t *dev, uint32_t name); cairo_private void -intel_bo_set_tiling (intel_device_t *dev, +intel_bo_set_tiling (const intel_device_t *dev, intel_bo_t *bo, uint32_t tiling, uint32_t stride); +cairo_private cairo_bool_t +intel_bo_is_inactive (const intel_device_t *device, + const intel_bo_t *bo); + +cairo_private void +intel_bo_wait (const intel_device_t *device, const intel_bo_t *bo); + cairo_private void intel_bo_write (const intel_device_t *dev, intel_bo_t *bo, @@ -150,9 +292,6 @@ intel_bo_read (const intel_device_t *dev, unsigned long size, void *data); -cairo_private void -intel_bo_wait (const intel_device_t *dev, intel_bo_t *bo); - cairo_private void * intel_bo_map (const intel_device_t *dev, intel_bo_t *bo); @@ -176,7 +315,203 @@ intel_bo_get_image (const intel_device_t *device, intel_bo_t *bo, const cairo_drm_surface_t *surface); +cairo_private cairo_status_t +intel_bo_put_image (intel_device_t *dev, + intel_bo_t *bo, int stride, + cairo_image_surface_t *src, + int src_x, int src_y, + int width, int height, + int dst_x, int dst_y); + +cairo_private void +intel_surface_init (intel_surface_t *surface, + const cairo_surface_backend_t *backend, + cairo_drm_device_t *device, + cairo_content_t content); + +cairo_private cairo_status_t +intel_buffer_cache_init (intel_buffer_cache_t *cache, + intel_device_t *device, + cairo_format_t format, + int width, int height); + +cairo_private cairo_status_t +intel_gradient_render (intel_device_t *device, + const cairo_gradient_pattern_t *pattern, + intel_buffer_t *buffer); + +cairo_private cairo_int_status_t +intel_get_glyph (intel_device_t *device, + cairo_scaled_font_t *scaled_font, + cairo_scaled_glyph_t *scaled_glyph); + +cairo_private void +intel_scaled_glyph_fini (cairo_scaled_glyph_t *scaled_glyph, + cairo_scaled_font_t *scaled_font); + +cairo_private void +intel_scaled_font_fini (cairo_scaled_font_t *scaled_font); + +cairo_private void +intel_glyph_cache_unmap (intel_device_t *device); + +cairo_private void +intel_glyph_cache_unpin (intel_device_t *device); + +static inline intel_glyph_t * +intel_glyph_pin (intel_glyph_t *glyph) +{ + cairo_rtree_node_t *node = &glyph->node; + if (unlikely (node->pinned == 0)) + return _cairo_rtree_pin (&glyph->cache->rtree, node); + return glyph; +} + +cairo_private cairo_status_t +intel_snapshot_cache_insert (intel_device_t *device, + intel_surface_t *surface); + +cairo_private void +intel_surface_detach_snapshot (cairo_surface_t *abstract_surface); + +cairo_private void +intel_snapshot_cache_thaw (intel_device_t *device); + cairo_private void intel_throttle (intel_device_t *device); +cairo_private cairo_status_t +intel_surface_acquire_source_image (void *abstract_surface, + cairo_image_surface_t **image_out, + void **image_extra); + +cairo_private void +intel_surface_release_source_image (void *abstract_surface, + cairo_image_surface_t *image, + void *image_extra); +cairo_private cairo_surface_t * +intel_surface_map_to_image (void *abstract_surface); + +cairo_private cairo_status_t +intel_surface_flush (void *abstract_surface); + +cairo_private cairo_status_t +intel_surface_finish (void *abstract_surface); + +cairo_private void +intel_dump_batchbuffer (const void *batch, + uint32_t length, + int devid); + +static inline float cairo_const +texcoord_2d_16 (double x, double y) +{ + union { + uint32_t ui; + float f; + } u; + u.ui = (_cairo_half_from_float (y) << 16) | _cairo_half_from_float (x); + return u.f; +} + +static inline uint32_t cairo_const +MS3_tiling (uint32_t tiling) +{ + switch (tiling) { + default: + case I915_TILING_NONE: return 0; + case I915_TILING_X: return MS3_TILED_SURFACE; + case I915_TILING_Y: return MS3_TILED_SURFACE | MS3_TILE_WALK; + } +} + +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 + +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE + +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q33_G 0x29D2 + +#define PCI_CHIP_IGD_GM 0xA011 +#define PCI_CHIP_IGD_G 0xA001 + +#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM) +#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G) +#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) + +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I915_GM || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_GM45_GM || IS_IGD(devid)) + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ + devid == PCI_CHIP_G41_G) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_IRONLAKE(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_915(devid) (devid == PCI_CHIP_I915_G || \ + devid == PCI_CHIP_E7221_G || \ + devid == PCI_CHIP_I915_GM) + +#define IS_945(devid) (devid == PCI_CHIP_I945_G || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_G33_G || \ + devid == PCI_CHIP_Q33_G || \ + devid == PCI_CHIP_Q35_G || IS_IGD(devid)) + +#define IS_965(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid) || \ + IS_IRONLAKE(devid)) + +#define IS_9XX(devid) (IS_915(devid) || \ + IS_945(devid) || \ + IS_965(devid)) + + #endif /* CAIRO_DRM_INTEL_PRIVATE_H */ diff --git a/src/drm/cairo-drm-intel-surface.c b/src/drm/cairo-drm-intel-surface.c index e9e1cacc..5b7c60be 100644 --- a/src/drm/cairo-drm-intel-surface.c +++ b/src/drm/cairo-drm-intel-surface.c @@ -31,84 +31,66 @@ #include "cairo-drm-private.h" #include "cairo-drm-intel-private.h" + #include "cairo-error-private.h" /* Basic generic/stub surface for intel chipsets */ #define MAX_SIZE 2048 -typedef struct _intel_surface intel_surface_t; - -struct _intel_surface { - cairo_drm_surface_t base; -}; - -static inline intel_device_t * -to_intel_device (cairo_drm_device_t *device) -{ - return (intel_device_t *) device; -} - -static inline intel_bo_t * -to_intel_bo (cairo_drm_bo_t *bo) -{ - return (intel_bo_t *) bo; -} - -static cairo_status_t -intel_batch_flush (intel_device_t *device) -{ - return CAIRO_STATUS_SUCCESS; -} - -static cairo_status_t -intel_surface_batch_flush (intel_surface_t *surface) +static cairo_surface_t * +intel_surface_create_similar (void *abstract_surface, + cairo_content_t content, + int width, + int height) { - if (to_intel_bo (surface->base.bo)->write_domain) - return intel_batch_flush (to_intel_device (surface->base.device)); - - return CAIRO_STATUS_SUCCESS; + return cairo_image_surface_create (_cairo_format_from_content (content), + width, height); } -static cairo_status_t +cairo_status_t intel_surface_finish (void *abstract_surface) { intel_surface_t *surface = abstract_surface; - return _cairo_drm_surface_finish (&surface->base); + return _cairo_drm_surface_finish (&surface->drm); } -static cairo_status_t +cairo_status_t intel_surface_acquire_source_image (void *abstract_surface, - cairo_image_surface_t **image_out, - void **image_extra) + cairo_image_surface_t **image_out, + void **image_extra) { intel_surface_t *surface = abstract_surface; cairo_surface_t *image; cairo_status_t status; - if (surface->base.fallback != NULL) { - image = surface->base.fallback; + /* XXX batch flush */ + + if (surface->drm.fallback != NULL) { + image = surface->drm.fallback; goto DONE; } - image = _cairo_surface_has_snapshot (&surface->base.base, + image = _cairo_surface_has_snapshot (&surface->drm.base, &_cairo_image_surface_backend); if (image != NULL) goto DONE; - status = intel_surface_batch_flush (surface); - if (unlikely (status)) - return status; + if (surface->drm.base.backend->flush != NULL) { + status = surface->drm.base.backend->flush (surface); + if (unlikely (status)) + return status; + } - image = intel_bo_get_image (to_intel_device (surface->base.device), - to_intel_bo (surface->base.bo), - &surface->base); + image = intel_bo_get_image (to_intel_device (surface->drm.base.device), + to_intel_bo (surface->drm.bo), + &surface->drm); status = image->status; if (unlikely (status)) return status; - status = _cairo_surface_attach_snapshot (&surface->base.base, + status = _cairo_surface_attach_snapshot (&surface->drm.base, image, cairo_surface_destroy); if (unlikely (status)) { @@ -122,7 +104,7 @@ DONE: return CAIRO_STATUS_SUCCESS; } -static void +void intel_surface_release_source_image (void *abstract_surface, cairo_image_surface_t *image, void *image_extra) @@ -130,180 +112,200 @@ intel_surface_release_source_image (void *abstract_surface, cairo_surface_destroy (&image->base); } -static cairo_surface_t * -intel_surface_snapshot (void *abstract_surface) +cairo_surface_t * +intel_surface_map_to_image (void *abstract_surface) { intel_surface_t *surface = abstract_surface; - cairo_status_t status; - if (surface->base.fallback != NULL) - return NULL; + if (surface->drm.fallback == NULL) { + cairo_surface_t *image; + cairo_status_t status; + void *ptr; - status = intel_surface_batch_flush (surface); - if (unlikely (status)) - return _cairo_surface_create_in_error (status); + if (surface->drm.base.backend->flush != NULL) { + status = surface->drm.base.backend->flush (surface); + if (unlikely (status)) + return _cairo_surface_create_in_error (status); + } + + ptr = intel_bo_map (to_intel_device (surface->drm.base.device), + to_intel_bo (surface->drm.bo)); + if (unlikely (ptr == NULL)) + return _cairo_surface_create_in_error (CAIRO_STATUS_NO_MEMORY); + + image = cairo_image_surface_create_for_data (ptr, + surface->drm.format, + surface->drm.width, + surface->drm.height, + surface->drm.stride); + if (unlikely (image->status)) { + intel_bo_unmap (to_intel_bo (surface->drm.bo)); + return image; + } + + surface->drm.fallback = image; + } - return intel_bo_get_image (to_intel_device (surface->base.device), - to_intel_bo (surface->base.bo), - &surface->base); + return surface->drm.fallback; } -static cairo_status_t -intel_surface_acquire_dest_image (void *abstract_surface, - cairo_rectangle_int_t *interest_rect, - cairo_image_surface_t **image_out, - cairo_rectangle_int_t *image_rect_out, - void **image_extra) +cairo_status_t +intel_surface_flush (void *abstract_surface) { intel_surface_t *surface = abstract_surface; - cairo_surface_t *image; cairo_status_t status; - void *ptr; - assert (surface->base.fallback == NULL); + if (surface->drm.fallback == NULL) + return CAIRO_STATUS_SUCCESS; - status = intel_surface_batch_flush (surface); - if (unlikely (status)) - return status; - - /* Force a read barrier, as well as flushing writes above */ - if (to_intel_bo (surface->base.bo)->in_batch) { - status = intel_batch_flush (to_intel_device (surface->base.device)); - if (unlikely (status)) - return status; - } - - ptr = intel_bo_map (to_intel_device (surface->base.device), - to_intel_bo (surface->base.bo)); - if (unlikely (ptr == NULL)) - return _cairo_error (CAIRO_STATUS_NO_MEMORY); - - image = cairo_image_surface_create_for_data (ptr, - surface->base.format, - surface->base.width, - surface->base.height, - surface->base.stride); - status = image->status; - if (unlikely (status)) { - intel_bo_unmap (to_intel_bo (surface->base.bo)); - return status; - } - - surface->base.fallback = cairo_surface_reference (image); + /* kill any outstanding maps */ + cairo_surface_finish (surface->drm.fallback); - *image_out = (cairo_image_surface_t *) image; - *image_extra = NULL; + status = cairo_surface_status (surface->drm.fallback); + cairo_surface_destroy (surface->drm.fallback); + surface->drm.fallback = NULL; - image_rect_out->x = 0; - image_rect_out->y = 0; - image_rect_out->width = surface->base.width; - image_rect_out->height = surface->base.height; + intel_bo_unmap (to_intel_bo (surface->drm.bo)); - return CAIRO_STATUS_SUCCESS; + return status; } -static void -intel_surface_release_dest_image (void *abstract_surface, - cairo_rectangle_int_t *interest_rect, - cairo_image_surface_t *image, - cairo_rectangle_int_t *image_rect, - void *image_extra) +static cairo_int_status_t +intel_surface_paint (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_clip_t *clip) { - /* Keep the fallback until we flush, either explicitly or at the - * end of this context. The idea is to avoid excess migration of - * the buffer between GPU and CPU domains. - */ - cairo_surface_destroy (&image->base); + return _cairo_surface_paint (intel_surface_map_to_image (abstract_surface), + op, source, clip); } -static cairo_status_t -intel_surface_flush (void *abstract_surface) +static cairo_int_status_t +intel_surface_mask (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + const cairo_pattern_t *mask, + cairo_clip_t *clip) { - intel_surface_t *surface = abstract_surface; - cairo_status_t status; - - if (surface->base.fallback == NULL) - return intel_surface_batch_flush (surface); - - /* kill any outstanding maps */ - cairo_surface_finish (surface->base.fallback); + return _cairo_surface_mask (intel_surface_map_to_image (abstract_surface), + op, source, mask, clip); +} - status = cairo_surface_status (surface->base.fallback); - cairo_surface_destroy (surface->base.fallback); - surface->base.fallback = NULL; +static cairo_int_status_t +intel_surface_stroke (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + const cairo_stroke_style_t *stroke_style, + const cairo_matrix_t *ctm, + const cairo_matrix_t *ctm_inverse, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + return _cairo_surface_stroke (intel_surface_map_to_image (abstract_surface), + op, source, path, stroke_style, ctm, ctm_inverse, + tolerance, antialias, clip); +} - intel_bo_unmap (to_intel_bo (surface->base.bo)); +static cairo_int_status_t +intel_surface_fill (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + return _cairo_surface_fill (intel_surface_map_to_image (abstract_surface), + op, source, path, fill_rule, + tolerance, antialias, clip); +} - return status; +static cairo_int_status_t +intel_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining) +{ + *num_remaining = 0; + return _cairo_surface_show_text_glyphs (intel_surface_map_to_image (abstract_surface), + op, source, + NULL, 0, + glyphs, num_glyphs, + NULL, 0, 0, + scaled_font, clip); } static const cairo_surface_backend_t intel_surface_backend = { CAIRO_SURFACE_TYPE_DRM, - _cairo_drm_surface_create_similar, - intel_surface_finish, + intel_surface_create_similar, + intel_surface_finish, intel_surface_acquire_source_image, intel_surface_release_source_image, - intel_surface_acquire_dest_image, - intel_surface_release_dest_image, - - NULL, //intel_surface_clone_similar, - NULL, //intel_surface_composite, - NULL, //intel_surface_fill_rectangles, - NULL, //intel_surface_composite_trapezoids, - NULL, //intel_surface_create_span_renderer, - NULL, //intel_surface_check_span_renderer, + NULL, NULL, NULL, + NULL, /* composite */ + NULL, /* fill */ + NULL, /* trapezoids */ + NULL, /* span */ + NULL, /* check-span */ + NULL, /* copy_page */ NULL, /* show_page */ _cairo_drm_surface_get_extents, - NULL, /* old_show_glyphs */ + NULL, /* old-glyphs */ _cairo_drm_surface_get_font_options, - intel_surface_flush, - NULL, /* mark_dirty_rectangle */ - NULL, //intel_surface_scaled_font_fini, - NULL, //intel_surface_scaled_glyph_fini, - - _cairo_drm_surface_paint, - _cairo_drm_surface_mask, - _cairo_drm_surface_stroke, - _cairo_drm_surface_fill, - _cairo_drm_surface_show_glyphs, - intel_surface_snapshot, - - NULL, /* is_similar */ + intel_surface_flush, + NULL, /* mark dirty */ + NULL, NULL, /* font/glyph fini */ + + intel_surface_paint, + intel_surface_mask, + intel_surface_stroke, + intel_surface_fill, + intel_surface_glyphs, }; -static void +void intel_surface_init (intel_surface_t *surface, - cairo_content_t content, - cairo_drm_device_t *device) + const cairo_surface_backend_t *backend, + cairo_drm_device_t *device, + cairo_content_t content) { - _cairo_surface_init (&surface->base.base, - &intel_surface_backend, - NULL, /* device */ + _cairo_surface_init (&surface->drm.base, + backend, + &device->base, content); - _cairo_drm_surface_init (&surface->base, device); + _cairo_drm_surface_init (&surface->drm, device); switch (content) { case CAIRO_CONTENT_ALPHA: - surface->base.format = CAIRO_FORMAT_A8; + surface->drm.format = CAIRO_FORMAT_A8; break; case CAIRO_CONTENT_COLOR: - surface->base.format = CAIRO_FORMAT_RGB24; + surface->drm.format = CAIRO_FORMAT_RGB24; break; default: ASSERT_NOT_REACHED; case CAIRO_CONTENT_COLOR_ALPHA: - surface->base.format = CAIRO_FORMAT_ARGB32; + surface->drm.format = CAIRO_FORMAT_ARGB32; break; } + + surface->snapshot_cache_entry.hash = 0; } static cairo_surface_t * -intel_surface_create_internal (cairo_drm_device_t *device, - cairo_content_t content, - int width, int height) +intel_surface_create (cairo_drm_device_t *device, + cairo_content_t content, + int width, int height) { intel_surface_t *surface; cairo_status_t status; @@ -312,36 +314,28 @@ intel_surface_create_internal (cairo_drm_device_t *device, if (unlikely (surface == NULL)) return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); - intel_surface_init (surface, content, device); + intel_surface_init (surface, &intel_surface_backend, device, content); if (width && height) { - surface->base.width = width; - surface->base.height = height; + surface->drm.width = width; + surface->drm.height = height; /* Vol I, p134: size restrictions for textures */ width = (width + 3) & -4; height = (height + 1) & -2; - surface->base.stride = - cairo_format_stride_for_width (surface->base.format, width); - surface->base.bo = intel_bo_create (to_intel_device (device), - surface->base.stride * height, - TRUE); - if (surface->base.bo == NULL) { - status = _cairo_drm_surface_finish (&surface->base); + surface->drm.stride = + cairo_format_stride_for_width (surface->drm.format, width); + surface->drm.bo = &intel_bo_create (to_intel_device (&device->base), + surface->drm.stride * height, + TRUE)->base; + if (surface->drm.bo == NULL) { + status = _cairo_drm_surface_finish (&surface->drm); free (surface); return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); } } - return &surface->base.base; -} - -static cairo_surface_t * -intel_surface_create (cairo_drm_device_t *device, - cairo_content_t content, - int width, int height) -{ - return intel_surface_create_internal (device, content, width, height); + return &surface->drm.base; } static cairo_surface_t * @@ -376,46 +370,41 @@ intel_surface_create_for_name (cairo_drm_device_t *device, if (unlikely (surface == NULL)) return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); - intel_surface_init (surface, content, device); + intel_surface_init (surface, &intel_surface_backend, device, content); if (width && height) { - surface->base.width = width; - surface->base.height = height; - surface->base.stride = stride; - - surface->base.bo = intel_bo_create_for_name (to_intel_device (device), - name); - if (unlikely (surface->base.bo == NULL)) { - status = _cairo_drm_surface_finish (&surface->base); + surface->drm.width = width; + surface->drm.height = height; + surface->drm.stride = stride; + + surface->drm.bo = &intel_bo_create_for_name (to_intel_device (&device->base), + name)->base; + if (unlikely (surface->drm.bo == NULL)) { + status = _cairo_drm_surface_finish (&surface->drm); free (surface); return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY)); } } - return &surface->base.base; + return &surface->drm.base; } static cairo_status_t intel_surface_enable_scan_out (void *abstract_surface) { intel_surface_t *surface = abstract_surface; - cairo_status_t status; - if (unlikely (surface->base.bo == NULL)) + if (unlikely (surface->drm.bo == NULL)) return _cairo_error (CAIRO_STATUS_INVALID_SIZE); - status = intel_surface_batch_flush (surface); - if (unlikely (status)) - return status; - - if (to_intel_bo (surface->base.bo)->tiling == I915_TILING_Y) { - intel_bo_set_tiling (to_intel_device (surface->base.device), - to_intel_bo (surface->base.bo), - I915_TILING_X, surface->base.stride); + if (to_intel_bo (surface->drm.bo)->tiling == I915_TILING_Y) { + intel_bo_set_tiling (to_intel_device (surface->drm.base.device), + to_intel_bo (surface->drm.bo), + I915_TILING_X, surface->drm.stride); } - if (unlikely (to_intel_bo (surface->base.bo)->tiling == I915_TILING_Y)) + if (unlikely (to_intel_bo (surface->drm.bo)->tiling == I915_TILING_Y)) return _cairo_error (CAIRO_STATUS_INVALID_FORMAT); /* XXX */ return CAIRO_STATUS_SUCCESS; @@ -424,13 +413,7 @@ intel_surface_enable_scan_out (void *abstract_surface) static cairo_int_status_t intel_device_throttle (cairo_drm_device_t *device) { - cairo_status_t status; - - status = intel_batch_flush (to_intel_device (device)); - if (unlikely (status)) - return status; - - intel_throttle (to_intel_device (device)); + intel_throttle (to_intel_device (&device->base)); return CAIRO_STATUS_SUCCESS; } @@ -455,24 +438,28 @@ _cairo_drm_intel_device_create (int fd, dev_t dev, int vendor_id, int chip_id) device = malloc (sizeof (intel_device_t)); if (unlikely (device == NULL)) - return _cairo_drm_device_create_in_error (CAIRO_STATUS_NO_MEMORY); + return (cairo_drm_device_t *) _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); status = intel_device_init (device, fd); if (unlikely (status)) { free (device); - return _cairo_drm_device_create_in_error (status); + return (cairo_drm_device_t *) _cairo_device_create_in_error (status); } - device->base.bo.release = intel_bo_release; - device->base.surface.create = intel_surface_create; device->base.surface.create_for_name = intel_surface_create_for_name; device->base.surface.create_from_cacheable_image = NULL; device->base.surface.flink = _cairo_drm_surface_flink; device->base.surface.enable_scan_out = intel_surface_enable_scan_out; + device->base.surface.map_to_image = intel_surface_map_to_image; + + device->base.device.flush = NULL; device->base.device.throttle = intel_device_throttle; device->base.device.destroy = intel_device_destroy; - return _cairo_drm_device_init (&device->base, fd, dev, MAX_SIZE); + return _cairo_drm_device_init (&device->base, + fd, dev, + vendor_id, chip_id, + MAX_SIZE); } diff --git a/src/drm/cairo-drm-intel.c b/src/drm/cairo-drm-intel.c index 6c8a8fd2..7cbbb16c 100644 --- a/src/drm/cairo-drm-intel.c +++ b/src/drm/cairo-drm-intel.c @@ -32,6 +32,8 @@ #include "cairo-drm-private.h" #include "cairo-drm-ioctl-private.h" #include "cairo-drm-intel-private.h" +#include "cairo-drm-intel-ioctl-private.h" + #include "cairo-error-private.h" #include "cairo-freelist-private.h" @@ -39,300 +41,13 @@ #include <sys/mman.h> #include <errno.h> -#define DRM_I915_GEM_EXECBUFFER 0x14 -#define DRM_I915_GEM_BUSY 0x17 -#define DRM_I915_GEM_THROTTLE 0x18 -#define DRM_I915_GEM_CREATE 0x1b -#define DRM_I915_GEM_PREAD 0x1c -#define DRM_I915_GEM_PWRITE 0x1d -#define DRM_I915_GEM_MMAP 0x1e -#define DRM_I915_GEM_SET_DOMAIN 0x1f -#define DRM_I915_GEM_SET_TILING 0x21 -#define DRM_I915_GEM_GET_TILING 0x22 -#define DRM_I915_GEM_GET_APERTURE 0x23 -#define DRM_I915_GEM_MMAP_GTT 0x24 - -struct drm_i915_gem_create { - /** - * Requested size for the object. - * - * The (page-aligned) allocated size for the object will be returned. - */ - uint64_t size; - /** - * Returned handle for the object. - * - * Object handles are nonzero. - */ - uint32_t handle; - uint32_t pad; -}; - -struct drm_i915_gem_pread { - /** Handle for the object being read. */ - uint32_t handle; - uint32_t pad; - /** Offset into the object to read from */ - uint64_t offset; - /** Length of data to read */ - uint64_t size; - /** - * Pointer to write the data into. - * - * This is a fixed-size type for 32/64 compatibility. - */ - uint64_t data_ptr; -}; - -struct drm_i915_gem_pwrite { - /** Handle for the object being written to. */ - uint32_t handle; - uint32_t pad; - /** Offset into the object to write to */ - uint64_t offset; - /** Length of data to write */ - uint64_t size; - /** - * Pointer to read the data from. - * - * This is a fixed-size type for 32/64 compatibility. - */ - uint64_t data_ptr; -}; - -struct drm_i915_gem_mmap { - /** Handle for the object being mapped. */ - uint32_t handle; - uint32_t pad; - /** Offset in the object to map. */ - uint64_t offset; - /** - * Length of data to map. - * - * The value will be page-aligned. - */ - uint64_t size; - /** - * Returned pointer the data was mapped at. - * - * This is a fixed-size type for 32/64 compatibility. - */ - uint64_t addr_ptr; -}; - -struct drm_i915_gem_mmap_gtt { - /** Handle for the object being mapped. */ - uint32_t handle; - uint32_t pad; - /** - * Fake offset to use for subsequent mmap call - * - * This is a fixed-size type for 32/64 compatibility. - */ - uint64_t offset; -}; - -struct drm_i915_gem_set_domain { - /** Handle for the object */ - uint32_t handle; - - /** New read domains */ - uint32_t read_domains; - - /** New write domain */ - uint32_t write_domain; -}; - -struct drm_i915_gem_relocation_entry { - /** - * Handle of the buffer being pointed to by this relocation entry. - * - * It's appealing to make this be an index into the mm_validate_entry - * list to refer to the buffer, but this allows the driver to create - * a relocation list for state buffers and not re-write it per - * exec using the buffer. - */ - uint32_t target_handle; - - /** - * Value to be added to the offset of the target buffer to make up - * the relocation entry. - */ - uint32_t delta; - - /** Offset in the buffer the relocation entry will be written into */ - uint64_t offset; - - /** - * Offset value of the target buffer that the relocation entry was last - * written as. - * - * If the buffer has the same offset as last time, we can skip syncing - * and writing the relocation. This value is written back out by - * the execbuffer ioctl when the relocation is written. - */ - uint64_t presumed_offset; - - /** - * Target memory domains read by this operation. - */ - uint32_t read_domains; - - /** - * Target memory domains written by this operation. - * - * Note that only one domain may be written by the whole - * execbuffer operation, so that where there are conflicts, - * the application will get -EINVAL back. - */ - uint32_t write_domain; -}; - -struct drm_i915_gem_exec_object { - /** - * User's handle for a buffer to be bound into the GTT for this - * operation. - */ - uint32_t handle; - - /** Number of relocations to be performed on this buffer */ - uint32_t relocation_count; - /** - * Pointer to array of struct drm_i915_gem_relocation_entry containing - * the relocations to be performed in this buffer. - */ - uint64_t relocs_ptr; - - /** Required alignment in graphics aperture */ - uint64_t alignment; - - /** - * Returned value of the updated offset of the object, for future - * presumed_offset writes. - */ - uint64_t offset; -}; - -struct drm_i915_gem_execbuffer { - /** - * List of buffers to be validated with their relocations to be - * performend on them. - * - * This is a pointer to an array of struct drm_i915_gem_validate_entry. - * - * These buffers must be listed in an order such that all relocations - * a buffer is performing refer to buffers that have already appeared - * in the validate list. - */ - uint64_t buffers_ptr; - uint32_t buffer_count; - - /** Offset in the batchbuffer to start execution from. */ - uint32_t batch_start_offset; - /** Bytes used in batchbuffer from batch_start_offset */ - uint32_t batch_len; - uint32_t DR1; - uint32_t DR4; - uint32_t num_cliprects; - /** This is a struct drm_clip_rect *cliprects */ - uint64_t cliprects_ptr; -}; - -struct drm_i915_gem_busy { - /** Handle of the buffer to check for busy */ - uint32_t handle; - - /** Return busy status (1 if busy, 0 if idle) */ - uint32_t busy; -}; - -struct drm_i915_gem_set_tiling { - /** Handle of the buffer to have its tiling state updated */ - uint32_t handle; - - /** - * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, - * I915_TILING_Y). - * - * This value is to be set on request, and will be updated by the - * kernel on successful return with the actual chosen tiling layout. - * - * The tiling mode may be demoted to I915_TILING_NONE when the system - * has bit 6 swizzling that can't be managed correctly by GEM. - * - * Buffer contents become undefined when changing tiling_mode. - */ - uint32_t tiling_mode; - - /** - * Stride in bytes for the object when in I915_TILING_X or - * I915_TILING_Y. - */ - uint32_t stride; - - /** - * Returned address bit 6 swizzling required for CPU access through - * mmap mapping. - */ - uint32_t swizzle_mode; -}; - -struct drm_i915_gem_get_tiling { - /** Handle of the buffer to get tiling state for. */ - uint32_t handle; - - /** - * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, - * I915_TILING_Y). - */ - uint32_t tiling_mode; - - /** - * Returned address bit 6 swizzling required for CPU access through - * mmap mapping. - */ - uint32_t swizzle_mode; -}; - -struct drm_i915_gem_get_aperture { - /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ - uint64_t aper_size; - - /** - * Available space in the aperture used by i915_gem_execbuffer, in - * bytes - */ - uint64_t aper_available_size; -}; - - -#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) -#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) -#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) -#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) -#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) -#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) -#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) -#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) -#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) -#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) -#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) -#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) - -/* XXX madvise */ -#ifndef DRM_I915_GEM_MADVISE -#define I915_MADV_WILLNEED 0 -#define I915_MADV_DONTNEED 1 - -struct drm_i915_gem_madvise { - uint32_t handle; - uint32_t madv; - uint32_t retained; -}; -#define DRM_I915_GEM_MADVISE 0x26 -#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) -#endif +#define GLYPH_CACHE_WIDTH 1024 +#define GLYPH_CACHE_HEIGHT 1024 +#define GLYPH_CACHE_MIN_SIZE 1 +#define GLYPH_CACHE_MAX_SIZE 128 +#define IMAGE_CACHE_WIDTH 1024 +#define IMAGE_CACHE_HEIGHT 1024 cairo_bool_t intel_info (int fd, uint64_t *gtt_size) @@ -453,12 +168,14 @@ intel_bo_map (const intel_device_t *device, intel_bo_t *bo) do { ret = ioctl (device->base.fd, - DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); } while (ret == -1 && errno == EINTR); if (ret != 0) { - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - return NULL; + intel_bo_unmap (bo); + _cairo_error_throw (CAIRO_STATUS_DEVICE_ERROR); + return NULL; } return bo->virtual; @@ -471,7 +188,7 @@ intel_bo_unmap (intel_bo_t *bo) bo->virtual = NULL; } -static cairo_bool_t +cairo_bool_t intel_bo_is_inactive (const intel_device_t *device, const intel_bo_t *bo) { struct drm_i915_gem_busy busy; @@ -484,6 +201,21 @@ intel_bo_is_inactive (const intel_device_t *device, const intel_bo_t *bo) return ! busy.busy; } +void +intel_bo_wait (const intel_device_t *device, const intel_bo_t *bo) +{ + struct drm_i915_gem_set_domain set_domain; + int ret; + + set_domain.handle = bo->base.handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = 0; + + do { + ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + } while (ret == -1 && errno == EINTR); +} + static inline int pot (int v) { @@ -515,7 +247,7 @@ intel_bo_cache_remove (intel_device_t *device, _cairo_freepool_free (&device->bo_pool, bo); } -static cairo_bool_t +cairo_bool_t intel_bo_madvise (intel_device_t *device, intel_bo_t *bo, int advice) @@ -548,7 +280,7 @@ intel_bo_cache_purge (intel_device_t *device) } } -cairo_drm_bo_t * +intel_bo_t * intel_bo_create (intel_device_t *device, uint32_t size, cairo_bool_t gpu_target) @@ -584,18 +316,19 @@ intel_bo_create (intel_device_t *device, * cause serialisation... */ - if (! intel_bo_madvise (device, bo, I915_MADV_WILLNEED)) { - intel_bo_cache_remove (device, bo, bucket); - goto retry; - } - if (device->bo_cache[bucket].num_entries-- > - device->bo_cache[bucket].min_entries) + device->bo_cache[bucket].min_entries) { device->bo_cache_size -= bo->base.size; } cairo_list_del (&bo->cache_list); - CAIRO_MUTEX_UNLOCK (device->bo_mutex); + + if (! intel_bo_madvise (device, bo, I915_MADV_WILLNEED)) { + _cairo_drm_bo_close (&device->base, &bo->base); + _cairo_freepool_free (&device->bo_pool, bo); + goto retry; + } + goto DONE; } @@ -608,18 +341,19 @@ intel_bo_create (intel_device_t *device, bo = cairo_list_first_entry (&device->bo_cache[bucket].list, intel_bo_t, cache_list); if (intel_bo_is_inactive (device, bo)) { - if (! intel_bo_madvise (device, bo, I915_MADV_WILLNEED)) { - intel_bo_cache_remove (device, bo, bucket); - goto retry; - } - if (device->bo_cache[bucket].num_entries-- > device->bo_cache[bucket].min_entries) { device->bo_cache_size -= bo->base.size; } cairo_list_del (&bo->cache_list); - CAIRO_MUTEX_UNLOCK (device->bo_mutex); + + if (! intel_bo_madvise (device, bo, I915_MADV_WILLNEED)) { + _cairo_drm_bo_close (&device->base, &bo->base); + _cairo_freepool_free (&device->bo_pool, bo); + goto retry; + } + goto DONE; } } @@ -646,10 +380,9 @@ intel_bo_create (intel_device_t *device, /* no cached buffer available, allocate fresh */ bo = _cairo_freepool_alloc (&device->bo_pool); - CAIRO_MUTEX_UNLOCK (device->bo_mutex); if (unlikely (bo == NULL)) { _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - return NULL; + goto UNLOCK; } cairo_list_init (&bo->cache_list); @@ -663,29 +396,37 @@ intel_bo_create (intel_device_t *device, bo->tiling = I915_TILING_NONE; bo->stride = 0; bo->swizzle = I915_BIT_6_SWIZZLE_NONE; + bo->purgeable = 0; + + bo->opaque0 = 0; + bo->opaque1 = 0; - bo->in_batch = FALSE; - bo->read_domains = 0; - bo->write_domain = 0; + bo->exec = NULL; + bo->batch_read_domains = 0; + bo->batch_write_domain = 0; + cairo_list_init (&bo->link); create.size = size; create.handle = 0; ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_CREATE, &create); if (unlikely (ret != 0)) { _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - free (bo); - return NULL; + _cairo_freepool_free (&device->bo_pool, bo); + bo = NULL; + goto UNLOCK; } bo->base.handle = create.handle; DONE: CAIRO_REFERENCE_COUNT_INIT (&bo->base.ref_count, 1); +UNLOCK: + CAIRO_MUTEX_UNLOCK (device->bo_mutex); - return &bo->base; + return bo; } -cairo_drm_bo_t * +intel_bo_t * intel_bo_create_for_name (intel_device_t *device, uint32_t name) { struct drm_i915_gem_get_tiling get_tiling; @@ -702,40 +443,48 @@ intel_bo_create_for_name (intel_device_t *device, uint32_t name) } status = _cairo_drm_bo_open_for_name (&device->base, &bo->base, name); - if (unlikely (status)) { - _cairo_freepool_free (&device->bo_pool, bo); - return NULL; - } + if (unlikely (status)) + goto FAIL; CAIRO_REFERENCE_COUNT_INIT (&bo->base.ref_count, 1); cairo_list_init (&bo->cache_list); bo->offset = 0; bo->virtual = NULL; + bo->purgeable = 0; + + bo->opaque0 = 0; + bo->opaque1 = 0; - bo->in_batch = FALSE; - bo->read_domains = 0; - bo->write_domain = 0; + bo->exec = NULL; + bo->batch_read_domains = 0; + bo->batch_write_domain = 0; + cairo_list_init (&bo->link); memset (&get_tiling, 0, sizeof (get_tiling)); get_tiling.handle = bo->base.handle; ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); if (unlikely (ret != 0)) { - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); + _cairo_error_throw (CAIRO_STATUS_DEVICE_ERROR); _cairo_drm_bo_close (&device->base, &bo->base); - _cairo_freepool_free (&device->bo_pool, bo); - return NULL; + goto FAIL; } bo->tiling = get_tiling.tiling_mode; bo->swizzle = get_tiling.swizzle_mode; // bo->stride = get_tiling.stride; /* XXX not available from get_tiling */ - return &bo->base; + return bo; + +FAIL: + CAIRO_MUTEX_LOCK (device->bo_mutex); + _cairo_freepool_free (&device->bo_pool, bo); + CAIRO_MUTEX_UNLOCK (device->bo_mutex); + return NULL; } -void +static void intel_bo_release (void *_dev, void *_bo) { intel_device_t *device = _dev; @@ -747,7 +496,10 @@ intel_bo_release (void *_dev, void *_bo) bucket = ffs (bo->base.size / 4096) - 1; CAIRO_MUTEX_LOCK (device->bo_mutex); - if (bo->base.name == 0 && bucket < INTEL_BO_CACHE_BUCKETS) { + if (bo->base.name == 0 && + bucket < INTEL_BO_CACHE_BUCKETS && + intel_bo_madvise (device, bo, I915_MADV_DONTNEED)) + { if (++device->bo_cache[bucket].num_entries > device->bo_cache[bucket].min_entries) { @@ -755,8 +507,6 @@ intel_bo_release (void *_dev, void *_bo) } cairo_list_add_tail (&bo->cache_list, &device->bo_cache[bucket].list); - - intel_bo_madvise (device, bo, I915_MADV_DONTNEED); } else { @@ -767,7 +517,7 @@ intel_bo_release (void *_dev, void *_bo) } void -intel_bo_set_tiling (intel_device_t *device, +intel_bo_set_tiling (const intel_device_t *device, intel_bo_t *bo, uint32_t tiling, uint32_t stride) @@ -781,7 +531,7 @@ intel_bo_set_tiling (intel_device_t *device, return; } - assert (! bo->in_batch); + assert (bo->exec == NULL); if (bo->virtual) intel_bo_unmap (bo); @@ -790,7 +540,9 @@ intel_bo_set_tiling (intel_device_t *device, set_tiling.tiling_mode = tiling; set_tiling.stride = stride; - ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + do { + ret = ioctl (device->base.fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && errno == EINTR); if (ret == 0) { bo->tiling = set_tiling.tiling_mode; bo->swizzle = set_tiling.swizzle_mode; @@ -859,6 +611,148 @@ intel_bo_get_image (const intel_device_t *device, return &image->base; } +static cairo_status_t +_intel_bo_put_a1_image (intel_device_t *dev, + intel_bo_t *bo, int stride, + cairo_image_surface_t *src, + int src_x, int src_y, + int width, int height, + int dst_x, int dst_y) +{ + uint8_t buf[CAIRO_STACK_BUFFER_SIZE]; + uint8_t *a8 = buf; + uint8_t *data; + int x; + + data = src->data + src_y * src->stride; + + if (bo->tiling == I915_TILING_NONE && width == stride) { + uint8_t *p; + int size; + + size = stride * height; + if (size > (int) sizeof (buf)) { + a8 = _cairo_malloc_ab (stride, height); + if (a8 == NULL) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + p = a8; + while (height--) { + for (x = 0; x < width; x++) { + int i = src_x + x; + int byte = i / 8; + int bit = i % 8; + p[x] = data[byte] & (1 << bit) ? 0xff : 0x00; + } + + data += src->stride; + p += stride; + } + + intel_bo_write (dev, bo, + dst_y * stride + dst_x, /* XXX bo_offset */ + size, a8); + } else { + uint8_t *dst; + + if (width > (int) sizeof (buf)) { + a8 = malloc (width); + if (a8 == NULL) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + dst = intel_bo_map (dev, bo); + if (dst == NULL) { + if (a8 != buf) + free (a8); + return _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + } + + dst += dst_y * stride + dst_x; /* XXX bo_offset */ + while (height--) { + for (x = 0; x < width; x++) { + int i = src_x + x; + int byte = i / 8; + int bit = i % 8; + a8[x] = data[byte] & (1 << bit) ? 0xff : 0x00; + } + + memcpy (dst, a8, width); + dst += stride; + data += src->stride; + } + intel_bo_unmap (bo); + } + + if (a8 != buf) + free (a8); + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +intel_bo_put_image (intel_device_t *dev, + intel_bo_t *bo, int stride, + cairo_image_surface_t *src, + int src_x, int src_y, + int width, int height, + int dst_x, int dst_y) +{ + uint8_t *data; + int size; + int offset; + + offset = dst_y * stride; + data = src->data + src_y * src->stride; + switch (src->format) { + default: + case CAIRO_FORMAT_ARGB32: + case CAIRO_FORMAT_RGB24: + offset += 4 * dst_x; + data += 4 * src_x; + size = 4 * width; + break; + case CAIRO_FORMAT_A8: + offset += dst_x; + data += src_x; + size = width; + break; + case CAIRO_FORMAT_A1: + return _intel_bo_put_a1_image (dev, + bo, stride, src, + src_x, src_y, + width, height, + dst_x, dst_y); + } + + if (bo->tiling == I915_TILING_NONE) { + if (src->stride == stride) { + intel_bo_write (dev, bo, offset, stride * height, data); + } else while (height--) { + intel_bo_write (dev, bo, offset, size, data); + offset += stride; + data += src->stride; + } + } else { + uint8_t *dst; + + dst = intel_bo_map (dev, bo); + if (unlikely (dst == NULL)) + return _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + + dst += offset; + while (height--) { + memcpy (dst, data, size); + dst += stride; + data += src->stride; + } + intel_bo_unmap (bo); + } + + return CAIRO_STATUS_SUCCESS; +} + static void _intel_device_init_bo_cache (intel_device_t *device) { @@ -883,23 +777,69 @@ _intel_device_init_bo_cache (intel_device_t *device) } _cairo_freepool_init (&device->bo_pool, sizeof (intel_bo_t)); + + device->base.surface.flink = _cairo_drm_surface_flink; + device->base.surface.map_to_image = intel_surface_map_to_image; +} + +static cairo_bool_t +_intel_snapshot_cache_entry_can_remove (const void *closure) +{ + return TRUE; +} + +static void +_intel_snapshot_cache_entry_destroy (void *closure) +{ + intel_surface_t *surface = cairo_container_of (closure, + intel_surface_t, + snapshot_cache_entry); + + surface->snapshot_cache_entry.hash = 0; + cairo_surface_destroy (&surface->drm.base); } cairo_status_t intel_device_init (intel_device_t *device, int fd) { struct drm_i915_gem_get_aperture aperture; + cairo_status_t status; + size_t size; int ret; + int n; ret = ioctl (fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); if (ret != 0) - return _cairo_error (CAIRO_STATUS_NO_MEMORY); + return _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + + CAIRO_MUTEX_INIT (device->mutex); device->gtt_max_size = aperture.aper_size; device->gtt_avail_size = aperture.aper_available_size; + device->gtt_avail_size -= device->gtt_avail_size >> 5; _intel_device_init_bo_cache (device); + size = aperture.aper_size / 8; + device->snapshot_cache_max_size = size / 4; + status = _cairo_cache_init (&device->snapshot_cache, + NULL, + _intel_snapshot_cache_entry_can_remove, + _intel_snapshot_cache_entry_destroy, + size); + if (unlikely (status)) + return status; + + device->glyph_cache_mapped = FALSE; + for (n = 0; n < ARRAY_LENGTH (device->glyph_cache); n++) { + device->glyph_cache[n].buffer.bo = NULL; + cairo_list_init (&device->glyph_cache[n].rtree.pinned); + } + + device->gradient_cache.size = 0; + + device->base.bo.release = intel_bo_release; + return CAIRO_STATUS_SUCCESS; } @@ -920,10 +860,42 @@ _intel_bo_cache_fini (intel_device_t *device) CAIRO_MUTEX_FINI (device->bo_mutex); } +static void +_intel_gradient_cache_fini (intel_device_t *device) +{ + unsigned int n; + + for (n = 0; n < device->gradient_cache.size; n++) { + _cairo_pattern_fini (&device->gradient_cache.cache[n].pattern.base); + if (device->gradient_cache.cache[n].buffer.bo != NULL) + cairo_drm_bo_destroy (&device->base.base, + &device->gradient_cache.cache[n].buffer.bo->base); + } +} + +static void +_intel_glyph_cache_fini (intel_device_t *device, intel_buffer_cache_t *cache) +{ + if (cache->buffer.bo == NULL) + return; + + intel_bo_destroy (device, cache->buffer.bo); + _cairo_rtree_fini (&cache->rtree); +} + void intel_device_fini (intel_device_t *device) { + int n; + + for (n = 0; n < ARRAY_LENGTH (device->glyph_cache); n++) + _intel_glyph_cache_fini (device, &device->glyph_cache[n]); + + _cairo_cache_fini (&device->snapshot_cache); + + _intel_gradient_cache_fini (device); _intel_bo_cache_fini (device); + _cairo_drm_device_fini (&device->base); } @@ -932,3 +904,591 @@ intel_throttle (intel_device_t *device) { ioctl (device->base.fd, DRM_IOCTL_I915_GEM_THROTTLE); } + +void +intel_glyph_cache_unmap (intel_device_t *device) +{ + int n; + + if (likely (! device->glyph_cache_mapped)) + return; + + for (n = 0; n < ARRAY_LENGTH (device->glyph_cache); n++) { + if (device->glyph_cache[n].buffer.bo != NULL && + device->glyph_cache[n].buffer.bo->virtual != NULL) + { + intel_bo_unmap (device->glyph_cache[n].buffer.bo); + } + } + + device->glyph_cache_mapped = FALSE; +} + +void +intel_glyph_cache_unpin (intel_device_t *device) +{ + int n; + + for (n = 0; n < ARRAY_LENGTH (device->glyph_cache); n++) + _cairo_rtree_unpin (&device->glyph_cache[n].rtree); +} + +static cairo_status_t +intel_glyph_cache_add_glyph (intel_device_t *device, + intel_buffer_cache_t *cache, + cairo_scaled_glyph_t *scaled_glyph) +{ + cairo_image_surface_t *glyph_surface = scaled_glyph->surface; + intel_glyph_t *glyph; + cairo_rtree_node_t *node = NULL; + double sf_x, sf_y; + cairo_status_t status; + uint8_t *dst, *src; + int width, height; + + width = glyph_surface->width; + if (width < GLYPH_CACHE_MIN_SIZE) + width = GLYPH_CACHE_MIN_SIZE; + height = glyph_surface->height; + if (height < GLYPH_CACHE_MIN_SIZE) + height = GLYPH_CACHE_MIN_SIZE; + + /* search for an available slot */ + status = _cairo_rtree_insert (&cache->rtree, width, height, &node); + /* search for an unpinned slot */ + if (status == CAIRO_INT_STATUS_UNSUPPORTED) { + status = _cairo_rtree_evict_random (&cache->rtree, width, height, &node); + if (status == CAIRO_STATUS_SUCCESS) + status = _cairo_rtree_node_insert (&cache->rtree, node, width, height, &node); + } + if (unlikely (status)) + return status; + + height = glyph_surface->height; + src = glyph_surface->data; + dst = cache->buffer.bo->virtual; + if (dst == NULL) { + dst = intel_bo_map (device, cache->buffer.bo); + if (unlikely (dst == NULL)) + return _cairo_error (CAIRO_STATUS_DEVICE_ERROR); + } + + dst += node->y * cache->buffer.stride; + switch (glyph_surface->format) { + case CAIRO_FORMAT_A1: { + uint8_t buf[CAIRO_STACK_BUFFER_SIZE]; + uint8_t *a8 = buf; + int x; + + if (width > (int) sizeof (buf)) { + a8 = malloc (width); + if (unlikely (a8 == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + dst += node->x; + width = glyph_surface->width; + while (height--) { + for (x = 0; x < width; x++) + a8[x] = src[x>>3] & (1 << (x&7)) ? 0xff : 0x00; + + memcpy (dst, a8, width); + dst += cache->buffer.stride; + src += glyph_surface->stride; + } + + if (a8 != buf) + free (a8); + break; + } + + case CAIRO_FORMAT_A8: + dst += node->x; + width = glyph_surface->width; + while (height--) { + memcpy (dst, src, width); + dst += cache->buffer.stride; + src += glyph_surface->stride; + } + break; + + default: + ASSERT_NOT_REACHED; + case CAIRO_FORMAT_RGB24: + case CAIRO_FORMAT_ARGB32: + dst += 4*node->x; + width = 4*glyph_surface->width; + while (height--) { + memcpy (dst, src, width); + dst += cache->buffer.stride; + src += glyph_surface->stride; + } + break; + } + + /* leave mapped! */ + device->glyph_cache_mapped = TRUE; + + scaled_glyph->surface_private = node; + + glyph= (intel_glyph_t *) node; + glyph->node.owner = &scaled_glyph->surface_private; + glyph->cache = cache; + + /* compute tex coords: bottom-right, bottom-left, top-left */ + sf_x = 1. / cache->buffer.width; + sf_y = 1. / cache->buffer.height; + glyph->texcoord[0] = + texcoord_2d_16 (sf_x * (node->x + glyph_surface->width + NEAREST_BIAS), + sf_y * (node->y + glyph_surface->height + NEAREST_BIAS)); + glyph->texcoord[1] = + texcoord_2d_16 (sf_x * (node->x + NEAREST_BIAS), + sf_y * (node->y + glyph_surface->height + NEAREST_BIAS)); + glyph->texcoord[2] = + texcoord_2d_16 (sf_x * (node->x + NEAREST_BIAS), + sf_y * (node->y + NEAREST_BIAS)); + + return CAIRO_STATUS_SUCCESS; +} + +void +intel_scaled_glyph_fini (cairo_scaled_glyph_t *scaled_glyph, + cairo_scaled_font_t *scaled_font) +{ + intel_glyph_t *glyph; + + glyph = scaled_glyph->surface_private; + if (glyph != NULL) { + glyph->node.owner = NULL; + if (! glyph->node.pinned) { + intel_buffer_cache_t *cache; + + /* XXX thread-safety? Probably ok due to the frozen scaled-font. */ + cache = glyph->cache; + assert (cache != NULL); + + glyph->node.state = CAIRO_RTREE_NODE_AVAILABLE; + cairo_list_move (&glyph->node.link, + &cache->rtree.available); + + if (! glyph->node.parent->pinned) + _cairo_rtree_node_collapse (&cache->rtree, glyph->node.parent); + } + } +} + +void +intel_scaled_font_fini (cairo_scaled_font_t *scaled_font) +{ + intel_device_t *device; + + device = scaled_font->surface_private; + if (device != NULL) { + /* XXX decouple? */ + } +} + +static cairo_status_t +intel_get_glyph_cache (intel_device_t *device, + cairo_format_t format, + intel_buffer_cache_t **out) +{ + intel_buffer_cache_t *cache; + cairo_status_t status; + + switch (format) { + case CAIRO_FORMAT_ARGB32: + case CAIRO_FORMAT_RGB24: + cache = &device->glyph_cache[0]; + format = CAIRO_FORMAT_ARGB32; + break; + case CAIRO_FORMAT_A8: + case CAIRO_FORMAT_A1: + cache = &device->glyph_cache[1]; + format = CAIRO_FORMAT_A8; + break; + default: + ASSERT_NOT_REACHED; + } + + if (unlikely (cache->buffer.bo == NULL)) { + status = intel_buffer_cache_init (cache, device, format, + INTEL_GLYPH_CACHE_WIDTH, + INTEL_GLYPH_CACHE_HEIGHT); + if (unlikely (status)) + return status; + + _cairo_rtree_init (&cache->rtree, + INTEL_GLYPH_CACHE_WIDTH, + INTEL_GLYPH_CACHE_HEIGHT, + 0, sizeof (intel_glyph_t), NULL); + } + + *out = cache; + return CAIRO_STATUS_SUCCESS; +} + +cairo_int_status_t +intel_get_glyph (intel_device_t *device, + cairo_scaled_font_t *scaled_font, + cairo_scaled_glyph_t *scaled_glyph) +{ + cairo_bool_t own_surface = FALSE; + intel_buffer_cache_t *cache; + cairo_status_t status; + + if (scaled_glyph->surface == NULL) { + status = + scaled_font->backend->scaled_glyph_init (scaled_font, + scaled_glyph, + CAIRO_SCALED_GLYPH_INFO_SURFACE); + if (unlikely (status)) + return status; + + if (unlikely (scaled_glyph->surface == NULL)) + return CAIRO_INT_STATUS_UNSUPPORTED; + + own_surface = TRUE; + } + + if (unlikely (scaled_glyph->surface->width == 0 || + scaled_glyph->surface->height == 0)) + { + return CAIRO_INT_STATUS_NOTHING_TO_DO; + } + + if (unlikely (scaled_glyph->surface->width > GLYPH_CACHE_MAX_SIZE || + scaled_glyph->surface->height > GLYPH_CACHE_MAX_SIZE)) + { + return CAIRO_INT_STATUS_UNSUPPORTED; + } + + status = intel_get_glyph_cache (device, + scaled_glyph->surface->format, + &cache); + if (unlikely (status)) + return status; + + status = intel_glyph_cache_add_glyph (device, cache, scaled_glyph); + if (unlikely (_cairo_status_is_error (status))) + return status; + + if (unlikely (status == CAIRO_INT_STATUS_UNSUPPORTED)) { + /* no room, replace entire cache */ + + assert (cache->buffer.bo->exec != NULL); + + _cairo_rtree_reset (&cache->rtree); + intel_bo_destroy (device, cache->buffer.bo); + cache->buffer.bo = NULL; + + status = intel_buffer_cache_init (cache, device, + scaled_glyph->surface->format, + GLYPH_CACHE_WIDTH, + GLYPH_CACHE_HEIGHT); + if (unlikely (status)) + return status; + + status = intel_glyph_cache_add_glyph (device, cache, scaled_glyph); + if (unlikely (status)) + return status; + } + + if (own_surface) { + /* and release the copy of the image from system memory */ + cairo_surface_destroy (&scaled_glyph->surface->base); + scaled_glyph->surface = NULL; + } + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +intel_buffer_cache_init (intel_buffer_cache_t *cache, + intel_device_t *device, + cairo_format_t format, + int width, int height) +{ + const uint32_t tiling = I915_TILING_Y; + + assert ((width & 3) == 0); + assert ((height & 1) == 0); + cache->buffer.format = format; + cache->buffer.width = width; + cache->buffer.height = height; + + switch (format) { + case CAIRO_FORMAT_A1: + case CAIRO_FORMAT_RGB24: + ASSERT_NOT_REACHED; + case CAIRO_FORMAT_ARGB32: + cache->buffer.map0 = MAPSURF_32BIT | MT_32BIT_ARGB8888; + cache->buffer.stride = width * 4; + break; + case CAIRO_FORMAT_A8: + cache->buffer.map0 = MAPSURF_8BIT | MT_8BIT_I8; + cache->buffer.stride = width; + break; + } + cache->buffer.map0 |= ((height - 1) << MS3_HEIGHT_SHIFT) | + ((width - 1) << MS3_WIDTH_SHIFT); + cache->buffer.map1 = ((cache->buffer.stride / 4) - 1) << MS4_PITCH_SHIFT; + + cache->buffer.bo = intel_bo_create (device, + height * cache->buffer.stride, FALSE); + if (unlikely (cache->buffer.bo == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + intel_bo_set_tiling (device, cache->buffer.bo, tiling, cache->buffer.stride); + + cache->buffer.map0 |= MS3_tiling (cache->buffer.bo->tiling); + + cache->ref_count = 0; + cairo_list_init (&cache->link); + + return CAIRO_STATUS_SUCCESS; +} + +cairo_status_t +intel_snapshot_cache_insert (intel_device_t *device, + intel_surface_t *surface) +{ + cairo_status_t status; + int bpp; + + bpp = 1; + if (surface->drm.format != CAIRO_FORMAT_A8) + bpp = 4; + + surface->snapshot_cache_entry.hash = (unsigned long) surface; + surface->snapshot_cache_entry.size = + surface->drm.width * surface->drm.height * bpp; + + if (surface->snapshot_cache_entry.size > + device->snapshot_cache_max_size) + { + return CAIRO_STATUS_SUCCESS; + } + + if (device->snapshot_cache.freeze_count == 0) + _cairo_cache_freeze (&device->snapshot_cache); + + status = _cairo_cache_insert (&device->snapshot_cache, + &surface->snapshot_cache_entry); + if (unlikely (status)) { + surface->snapshot_cache_entry.hash = 0; + return status; + } + + cairo_surface_reference (&surface->drm.base); + + return CAIRO_STATUS_SUCCESS; +} + +void +intel_surface_detach_snapshot (cairo_surface_t *abstract_surface) +{ + intel_surface_t *surface = (intel_surface_t *) abstract_surface; + + if (surface->snapshot_cache_entry.hash) { + intel_device_t *device; + + device = (intel_device_t *) surface->drm.base.device; + _cairo_cache_remove (&device->snapshot_cache, + &surface->snapshot_cache_entry); + surface->snapshot_cache_entry.hash = 0; + } +} + +void +intel_snapshot_cache_thaw (intel_device_t *device) +{ + if (device->snapshot_cache.freeze_count) + _cairo_cache_thaw (&device->snapshot_cache); +} + +static cairo_bool_t +_gradient_color_stops_equal (const cairo_gradient_pattern_t *a, + const cairo_gradient_pattern_t *b) +{ + unsigned int n; + + if (a->n_stops != b->n_stops) + return FALSE; + + for (n = 0; n < a->n_stops; n++) { + if (_cairo_fixed_from_double (a->stops[n].offset) != + _cairo_fixed_from_double (b->stops[n].offset)) + { + return FALSE; + } + + if (! _cairo_color_equal (&a->stops[n].color, &b->stops[n].color)) + return FALSE; + } + + return TRUE; +} + +static uint32_t +hars_petruska_f54_1_random (void) +{ +#define rol(x,k) ((x << k) | (x >> (32-k))) + static uint32_t x; + return x = (x ^ rol (x, 5) ^ rol (x, 24)) + 0x37798849; +#undef rol +} + +static int +intel_gradient_sample_width (const cairo_gradient_pattern_t *gradient) +{ + unsigned int n; + int width; + + width = 8; + for (n = 1; n < gradient->n_stops; n++) { + double dx = gradient->stops[n].offset - gradient->stops[n-1].offset; + double delta, max; + int ramp; + + if (dx == 0) + continue; + + max = gradient->stops[n].color.red - + gradient->stops[n-1].color.red; + + delta = gradient->stops[n].color.green - + gradient->stops[n-1].color.green; + if (delta > max) + max = delta; + + delta = gradient->stops[n].color.blue - + gradient->stops[n-1].color.blue; + if (delta > max) + max = delta; + + delta = gradient->stops[n].color.alpha - + gradient->stops[n-1].color.alpha; + if (delta > max) + max = delta; + + ramp = 128 * max / dx; + if (ramp > width) + width = ramp; + } + + width = (width + 7) & -8; + return MIN (width, 1024); +} + +cairo_status_t +intel_gradient_render (intel_device_t *device, + const cairo_gradient_pattern_t *pattern, + intel_buffer_t *buffer) +{ + pixman_image_t *gradient, *image; + pixman_gradient_stop_t pixman_stops_stack[32]; + pixman_gradient_stop_t *pixman_stops; + pixman_point_fixed_t p1, p2; + int width; + unsigned int i; + cairo_status_t status; + + for (i = 0; i < device->gradient_cache.size; i++) { + if (_gradient_color_stops_equal (pattern, + &device->gradient_cache.cache[i].pattern.gradient.base)) { + *buffer = device->gradient_cache.cache[i].buffer; + return CAIRO_STATUS_SUCCESS; + } + } + + pixman_stops = pixman_stops_stack; + if (unlikely (pattern->n_stops > ARRAY_LENGTH (pixman_stops_stack))) { + pixman_stops = _cairo_malloc_ab (pattern->n_stops, + sizeof (pixman_gradient_stop_t)); + if (unlikely (pixman_stops == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + for (i = 0; i < pattern->n_stops; i++) { + pixman_stops[i].x = _cairo_fixed_16_16_from_double (pattern->stops[i].offset); + pixman_stops[i].color.red = pattern->stops[i].color.red_short; + pixman_stops[i].color.green = pattern->stops[i].color.green_short; + pixman_stops[i].color.blue = pattern->stops[i].color.blue_short; + pixman_stops[i].color.alpha = pattern->stops[i].color.alpha_short; + } + + width = intel_gradient_sample_width (pattern); + + p1.x = 0; + p1.y = 0; + p2.x = width << 16; + p2.y = 0; + + gradient = pixman_image_create_linear_gradient (&p1, &p2, + pixman_stops, + pattern->n_stops); + if (pixman_stops != pixman_stops_stack) + free (pixman_stops); + + if (unlikely (gradient == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + pixman_image_set_filter (gradient, PIXMAN_FILTER_BILINEAR, NULL, 0); + + image = pixman_image_create_bits (PIXMAN_a8r8g8b8, width, 1, NULL, 0); + if (unlikely (image == NULL)) { + pixman_image_unref (gradient); + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + pixman_image_composite (PIXMAN_OP_SRC, + gradient, NULL, image, + 0, 0, + 0, 0, + 0, 0, + width, 1); + + pixman_image_unref (gradient); + + buffer->bo = intel_bo_create (device, 4*width, FALSE); + if (unlikely (buffer->bo == NULL)) { + pixman_image_unref (image); + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + intel_bo_write (device, buffer->bo, 0, 4*width, pixman_image_get_data (image)); + pixman_image_unref (image); + + buffer->offset = 0; + buffer->width = width; + buffer->height = 1; + buffer->stride = 4*width; + buffer->format = CAIRO_FORMAT_ARGB32; + buffer->map0 = MAPSURF_32BIT | MT_32BIT_ARGB8888; + buffer->map0 |= MS3_tiling (buffer->bo->tiling); + buffer->map0 |= ((width - 1) << MS3_WIDTH_SHIFT); + buffer->map1 = (width - 1) << MS4_PITCH_SHIFT; + + if (device->gradient_cache.size < GRADIENT_CACHE_SIZE) { + i = device->gradient_cache.size++; + } else { + i = hars_petruska_f54_1_random () % GRADIENT_CACHE_SIZE; + _cairo_pattern_fini (&device->gradient_cache.cache[i].pattern.base); + intel_bo_destroy (device, device->gradient_cache.cache[i].buffer.bo); + } + + status = _cairo_pattern_init_copy (&device->gradient_cache.cache[i].pattern.base, + &pattern->base); + if (unlikely (status)) { + intel_bo_destroy (device, buffer->bo); + /* Ensure the cache is correctly initialised for i965_device_destroy */ + _cairo_pattern_init_solid (&device->gradient_cache.cache[i].pattern.solid, + CAIRO_COLOR_TRANSPARENT, + CAIRO_CONTENT_ALPHA); + return status; + } + + device->gradient_cache.cache[i].buffer = *buffer; + return CAIRO_STATUS_SUCCESS; +} diff --git a/src/drm/cairo-drm-private.h b/src/drm/cairo-drm-private.h index 0f1b735f..549832d2 100644 --- a/src/drm/cairo-drm-private.h +++ b/src/drm/cairo-drm-private.h @@ -38,11 +38,14 @@ #include "cairo-drm.h" -#include "cairo-surface-private.h" +#include "cairo-device-private.h" #include "cairo-reference-count-private.h" +#include "cairo-surface-private.h" #include <sys/types.h> /* dev_t */ +typedef struct _cairo_drm_device cairo_drm_device_t; + typedef cairo_drm_device_t * (*cairo_drm_device_create_func_t) (int fd, dev_t dev, @@ -50,6 +53,9 @@ typedef cairo_drm_device_t * int chip_id); typedef cairo_int_status_t +(*cairo_drm_device_flush_func_t) (cairo_drm_device_t *device); + +typedef cairo_int_status_t (*cairo_drm_device_throttle_func_t) (cairo_drm_device_t *device); typedef void @@ -76,11 +82,15 @@ typedef cairo_int_status_t typedef cairo_status_t (*cairo_drm_surface_enable_scan_out_func_t) (void *surface); +typedef cairo_surface_t * +(*cairo_drm_surface_map_to_image_func_t) (void *surface); + typedef struct _cairo_drm_bo_backend { void (*release) (void *device, void *bo); } cairo_drm_bo_backend_t; typedef struct _cairo_drm_device_backend { + cairo_drm_device_flush_func_t flush; cairo_drm_device_throttle_func_t throttle; cairo_drm_device_destroy_func_t destroy; } cairo_drm_device_backend_t; @@ -91,6 +101,7 @@ typedef struct _cairo_drm_surface_backend { cairo_drm_surface_create_from_cacheable_image_func_t create_from_cacheable_image; cairo_drm_surface_flink_func_t flink; cairo_drm_surface_enable_scan_out_func_t enable_scan_out; + cairo_drm_surface_map_to_image_func_t map_to_image; } cairo_drm_surface_backend_t; typedef struct _cairo_drm_bo { @@ -101,9 +112,10 @@ typedef struct _cairo_drm_bo { } cairo_drm_bo_t; struct _cairo_drm_device { - cairo_reference_count_t ref_count; - cairo_status_t status; + cairo_device_t base; + int vendor_id; + int chip_id; dev_t id; int fd; @@ -119,7 +131,6 @@ struct _cairo_drm_device { typedef struct _cairo_drm_surface { cairo_surface_t base; - cairo_drm_device_t *device; cairo_drm_bo_t *bo; cairo_format_t format; @@ -136,17 +147,16 @@ cairo_drm_bo_reference (cairo_drm_bo_t *bo) return bo; } -static inline void -cairo_drm_bo_destroy (cairo_drm_device_t *device, +static always_inline void +cairo_drm_bo_destroy (cairo_device_t *abstract_device, cairo_drm_bo_t *bo) { - if (_cairo_reference_count_dec_and_test (&bo->ref_count)) + if (_cairo_reference_count_dec_and_test (&bo->ref_count)) { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; device->bo.release (device, bo); + } } -cairo_private cairo_drm_device_t * -_cairo_drm_device_create_in_error (cairo_status_t status); - cairo_private cairo_status_t _cairo_drm_bo_open_for_name (const cairo_drm_device_t *dev, cairo_drm_bo_t *bo, @@ -181,56 +191,20 @@ _cairo_drm_surface_get_extents (void *abstract_surface, cairo_rectangle_int_t *rectangle); cairo_private cairo_int_status_t -_cairo_drm_surface_paint (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_clip_t *clip); - -cairo_private cairo_int_status_t -_cairo_drm_surface_mask (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - const cairo_pattern_t *mask, - cairo_clip_t *clip); - -cairo_private cairo_int_status_t -_cairo_drm_surface_stroke (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_path_fixed_t *path, - const cairo_stroke_style_t *style, - const cairo_matrix_t *ctm, - const cairo_matrix_t *ctm_inverse, - double tolerance, - cairo_antialias_t antialias, - cairo_clip_t *clip); - -cairo_private cairo_int_status_t -_cairo_drm_surface_fill (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_path_fixed_t *path, - cairo_fill_rule_t fill_rule, - double tolerance, - cairo_antialias_t antialias, - cairo_clip_t *clip); - -cairo_private cairo_int_status_t -_cairo_drm_surface_show_glyphs (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_glyph_t *glyphs, - int num_glyphs, - cairo_scaled_font_t *scaled_font, - cairo_clip_t *clip, - int *remaining_glyphs); - -cairo_private cairo_int_status_t _cairo_drm_surface_flink (void *abstract_surface); +static inline cairo_drm_device_t * +_cairo_drm_device_create_in_error (cairo_status_t status) +{ + return (cairo_drm_device_t *) _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); +} + cairo_private cairo_drm_device_t * _cairo_drm_device_init (cairo_drm_device_t *device, - int fd, dev_t id, + int fd, + dev_t devid, + int vendor_id, + int chip_id, int max_surface_size); cairo_private void @@ -242,6 +216,12 @@ cairo_private cairo_drm_device_t * _cairo_drm_intel_device_create (int fd, dev_t dev, int vendor_id, int chip_id); cairo_private cairo_drm_device_t * +_cairo_drm_i915_device_create (int fd, dev_t dev, int vendor_id, int chip_id); + +cairo_private cairo_drm_device_t * +_cairo_drm_i965_device_create (int fd, dev_t dev, int vendor_id, int chip_id); + +cairo_private cairo_drm_device_t * _cairo_drm_radeon_device_create (int fd, dev_t dev, int vendor_id, int chip_id); #if CAIRO_HAS_GALLIUM_SURFACE @@ -250,8 +230,13 @@ _cairo_drm_gallium_device_create (int fd, dev_t dev, int vendor_id, int chip_id) #endif slim_hidden_proto (cairo_drm_device_default); -slim_hidden_proto (cairo_drm_device_destroy); slim_hidden_proto (cairo_drm_device_get); -slim_hidden_proto_no_warn (cairo_drm_device_reference); +slim_hidden_proto (cairo_drm_device_get_for_fd); + +slim_hidden_proto (cairo_drm_surface_create_for_name); + +cairo_private cairo_bool_t +_cairo_drm_size_is_valid (cairo_device_t *abstract_device, + int width, int height); #endif /* CAIRO_DRM_PRIVATE_H */ diff --git a/src/drm/cairo-drm-radeon-private.h b/src/drm/cairo-drm-radeon-private.h index 9c0c3b46..e63bb04d 100644 --- a/src/drm/cairo-drm-radeon-private.h +++ b/src/drm/cairo-drm-radeon-private.h @@ -99,9 +99,6 @@ radeon_bo_create (radeon_device_t *dev, cairo_private cairo_drm_bo_t * radeon_bo_create_for_name (radeon_device_t *dev, uint32_t name); -cairo_private void -radeon_bo_release (void *_dev, void *_bo); - cairo_private cairo_surface_t * radeon_bo_get_image (const radeon_device_t *device, radeon_bo_t *bo, diff --git a/src/drm/cairo-drm-radeon-surface.c b/src/drm/cairo-drm-radeon-surface.c index 858f353f..7521199a 100644 --- a/src/drm/cairo-drm-radeon-surface.c +++ b/src/drm/cairo-drm-radeon-surface.c @@ -31,6 +31,7 @@ #include "cairo-drm-private.h" #include "cairo-drm-radeon-private.h" + #include "cairo-error-private.h" /* Basic stub surface for radeon chipsets */ @@ -42,7 +43,7 @@ typedef struct _radeon_surface { } radeon_surface_t; static inline radeon_device_t * -to_radeon_device (cairo_drm_device_t *device) +to_radeon_device (cairo_device_t *device) { return (radeon_device_t *) device; } @@ -53,19 +54,14 @@ to_radeon_bo (cairo_drm_bo_t *bo) return (radeon_bo_t *) bo; } -static cairo_status_t -radeon_batch_flush (radeon_device_t *device) -{ - return CAIRO_STATUS_SUCCESS; -} - -static cairo_status_t -radeon_surface_batch_flush (radeon_surface_t *surface) +static cairo_surface_t * +radeon_surface_create_similar (void *abstract_surface, + cairo_content_t content, + int width, + int height) { - if (to_radeon_bo (surface->base.bo)->write_domain) - return radeon_batch_flush (to_radeon_device (surface->base.device)); - - return CAIRO_STATUS_SUCCESS; + return cairo_image_surface_create (_cairo_format_from_content (content), + width, height); } static cairo_status_t @@ -85,6 +81,8 @@ radeon_surface_acquire_source_image (void *abstract_surface, cairo_surface_t *image; cairo_status_t status; + /* XXX batch flush */ + if (surface->base.fallback != NULL) { image = surface->base.fallback; goto DONE; @@ -95,13 +93,15 @@ radeon_surface_acquire_source_image (void *abstract_surface, if (image != NULL) goto DONE; - status = radeon_surface_batch_flush (surface); - if (unlikely (status)) - return status; + if (surface->base.base.backend->flush != NULL) { + status = surface->base.base.backend->flush (surface); + if (unlikely (status)) + return status; + } - image = radeon_bo_get_image (to_radeon_device (surface->base.device), - to_radeon_bo (surface->base.bo), - &surface->base); + image = radeon_bo_get_image (to_radeon_device (surface->base.base.device), + to_radeon_bo (surface->base.bo), + &surface->base); status = image->status; if (unlikely (status)) return status; @@ -121,94 +121,46 @@ DONE: } static void -radeon_surface_release_source_image (void *abstract_surface, - cairo_image_surface_t *image, - void *image_extra) +radeon_surface_release_source_image (void *abstract_surface, + cairo_image_surface_t *image, + void *image_extra) { cairo_surface_destroy (&image->base); } static cairo_surface_t * -radeon_surface_snapshot (void *abstract_surface) -{ - radeon_surface_t *surface = abstract_surface; - cairo_status_t status; - - if (surface->base.fallback != NULL) - return NULL; - - status = radeon_surface_batch_flush (surface); - if (unlikely (status)) - return _cairo_surface_create_in_error (status); - - return radeon_bo_get_image (to_radeon_device (surface->base.device), - to_radeon_bo (surface->base.bo), - &surface->base); -} - -static cairo_status_t -radeon_surface_acquire_dest_image (void *abstract_surface, - cairo_rectangle_int_t *interest_rect, - cairo_image_surface_t **image_out, - cairo_rectangle_int_t *image_rect_out, - void **image_extra) +radeon_surface_map_to_image (radeon_surface_t *surface) { - radeon_surface_t *surface = abstract_surface; - cairo_surface_t *image; - cairo_status_t status; - void *ptr; - - assert (surface->base.fallback == NULL); - - status = radeon_surface_batch_flush (surface); - if (unlikely (status)) - return status; - - /* Force a read barrier, as well as flushing writes above */ - radeon_bo_wait (to_radeon_device (surface->base.device), - to_radeon_bo (surface->base.bo)); + if (surface->base.fallback == NULL) { + cairo_surface_t *image; + cairo_status_t status; + void *ptr; + + if (surface->base.base.backend->flush != NULL) { + status = surface->base.base.backend->flush (surface); + if (unlikely (status)) + return _cairo_surface_create_in_error (status); + } - ptr = radeon_bo_map (to_radeon_device (surface->base.device), - to_radeon_bo (surface->base.bo)); - if (unlikely (ptr == NULL)) - return _cairo_error (CAIRO_STATUS_NO_MEMORY); + ptr = radeon_bo_map (to_radeon_device (surface->base.base.device), + to_radeon_bo (surface->base.bo)); + if (unlikely (ptr == NULL)) + return _cairo_surface_create_in_error (CAIRO_STATUS_NO_MEMORY); + + image = cairo_image_surface_create_for_data (ptr, + surface->base.format, + surface->base.width, + surface->base.height, + surface->base.stride); + if (unlikely (image->status)) { + radeon_bo_unmap (to_radeon_bo (surface->base.bo)); + return image; + } - image = cairo_image_surface_create_for_data (ptr, - surface->base.format, - surface->base.width, - surface->base.height, - surface->base.stride); - status = image->status; - if (unlikely (status)) { - radeon_bo_unmap (to_radeon_bo (surface->base.bo)); - return status; + surface->base.fallback = image; } - surface->base.fallback = cairo_surface_reference (image); - - *image_out = (cairo_image_surface_t *) image; - *image_extra = NULL; - - image_rect_out->x = 0; - image_rect_out->y = 0; - image_rect_out->width = surface->base.width; - image_rect_out->height = surface->base.height; - - return CAIRO_STATUS_SUCCESS; -} - -static void -radeon_surface_release_dest_image (void *abstract_surface, - cairo_rectangle_int_t *interest_rect, - cairo_image_surface_t *image, - cairo_rectangle_int_t *image_rect, - void *image_extra) -{ - /* Keep the fallback until we flush, either explicitly or at the - * end of this context. The idea is to avoid excess migration of - * the buffer between GPU and CPU domains. - */ - cairo_surface_destroy (&image->base); + return surface->base.fallback; } static cairo_status_t @@ -218,7 +170,7 @@ radeon_surface_flush (void *abstract_surface) cairo_status_t status; if (surface->base.fallback == NULL) - return radeon_surface_batch_flush (surface); + return CAIRO_STATUS_SUCCESS; /* kill any outstanding maps */ cairo_surface_finish (surface->base.fallback); @@ -232,53 +184,117 @@ radeon_surface_flush (void *abstract_surface) return status; } +static cairo_int_status_t +radeon_surface_paint (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_clip_t *clip) +{ + return _cairo_surface_paint (radeon_surface_map_to_image (abstract_surface), + op, source, clip); +} + +static cairo_int_status_t +radeon_surface_mask (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + const cairo_pattern_t *mask, + cairo_clip_t *clip) +{ + return _cairo_surface_mask (radeon_surface_map_to_image (abstract_surface), + op, source, mask, clip); +} + +static cairo_int_status_t +radeon_surface_stroke (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + const cairo_stroke_style_t *stroke_style, + const cairo_matrix_t *ctm, + const cairo_matrix_t *ctm_inverse, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + return _cairo_surface_stroke (radeon_surface_map_to_image (abstract_surface), + op, source, path, stroke_style, ctm, ctm_inverse, + tolerance, antialias, clip); +} + +static cairo_int_status_t +radeon_surface_fill (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_path_fixed_t *path, + cairo_fill_rule_t fill_rule, + double tolerance, + cairo_antialias_t antialias, + cairo_clip_t *clip) +{ + return _cairo_surface_fill (radeon_surface_map_to_image (abstract_surface), + op, source, path, fill_rule, + tolerance, antialias, clip); +} + +static cairo_int_status_t +radeon_surface_glyphs (void *abstract_surface, + cairo_operator_t op, + const cairo_pattern_t *source, + cairo_glyph_t *glyphs, + int num_glyphs, + cairo_scaled_font_t *scaled_font, + cairo_clip_t *clip, + int *num_remaining) +{ + *num_remaining = 0; + return _cairo_surface_show_text_glyphs (radeon_surface_map_to_image (abstract_surface), + op, source, + NULL, 0, + glyphs, num_glyphs, + NULL, 0, 0, + scaled_font, clip); +} + static const cairo_surface_backend_t radeon_surface_backend = { CAIRO_SURFACE_TYPE_DRM, - _cairo_drm_surface_create_similar, - radeon_surface_finish, + radeon_surface_create_similar, + radeon_surface_finish, radeon_surface_acquire_source_image, radeon_surface_release_source_image, - radeon_surface_acquire_dest_image, - radeon_surface_release_dest_image, - - NULL, //radeon_surface_clone_similar, - NULL, //radeon_surface_composite, - NULL, //radeon_surface_fill_rectangles, - NULL, //radeon_surface_composite_trapezoids, - NULL, //radeon_surface_create_span_renderer, - NULL, //radeon_surface_check_span_renderer, + NULL, NULL, NULL, + NULL, /* composite */ + NULL, /* fill */ + NULL, /* trapezoids */ + NULL, /* span */ + NULL, /* check-span */ + NULL, /* copy_page */ NULL, /* show_page */ _cairo_drm_surface_get_extents, - NULL, /* old_show_glyphs */ + NULL, /* old-glyphs */ _cairo_drm_surface_get_font_options, - radeon_surface_flush, - NULL, /* mark_dirty_rectangle */ - NULL, //radeon_surface_scaled_font_fini, - NULL, //radeon_surface_scaled_glyph_fini, - - _cairo_drm_surface_paint, - _cairo_drm_surface_mask, - _cairo_drm_surface_stroke, - _cairo_drm_surface_fill, - _cairo_drm_surface_show_glyphs, - - radeon_surface_snapshot, - NULL, /* is_similar */ - - NULL, /* reset */ + radeon_surface_flush, + NULL, /* mark dirty */ + NULL, NULL, /* font/glyph fini */ + + radeon_surface_paint, + radeon_surface_mask, + radeon_surface_stroke, + radeon_surface_fill, + radeon_surface_glyphs, }; static void radeon_surface_init (radeon_surface_t *surface, - cairo_content_t content, - cairo_drm_device_t *device) + cairo_content_t content, + cairo_drm_device_t *device) { _cairo_surface_init (&surface->base.base, &radeon_surface_backend, - NULL, /* device */ + &device->base, content); _cairo_drm_surface_init (&surface->base, device); @@ -318,7 +334,7 @@ radeon_surface_create_internal (cairo_drm_device_t *device, surface->base.stride = cairo_format_stride_for_width (surface->base.format, width); - surface->base.bo = radeon_bo_create (to_radeon_device (device), + surface->base.bo = radeon_bo_create (to_radeon_device (&device->base), surface->base.stride * height, RADEON_GEM_DOMAIN_GTT); @@ -379,7 +395,7 @@ radeon_surface_create_for_name (cairo_drm_device_t *device, surface->base.height = height; surface->base.stride = stride; - surface->base.bo = radeon_bo_create_for_name (to_radeon_device (device), + surface->base.bo = radeon_bo_create_for_name (to_radeon_device (&device->base), name); if (unlikely (surface->base.bo == NULL)) { @@ -428,13 +444,12 @@ _cairo_drm_radeon_device_create (int fd, dev_t dev, int vendor_id, int chip_id) device->base.surface.flink = _cairo_drm_surface_flink; device->base.surface.enable_scan_out = NULL; + device->base.device.flush = NULL; device->base.device.throttle = NULL; device->base.device.destroy = radeon_device_destroy; - device->base.bo.release = radeon_bo_release; - device->vram_limit = vram_size; device->gart_limit = gart_size; - return _cairo_drm_device_init (&device->base, dev, fd, MAX_SIZE); + return _cairo_drm_device_init (&device->base, fd, dev, vendor_id, chip_id, MAX_SIZE); } diff --git a/src/drm/cairo-drm-radeon.c b/src/drm/cairo-drm-radeon.c index e435d705..a9683083 100644 --- a/src/drm/cairo-drm-radeon.c +++ b/src/drm/cairo-drm-radeon.c @@ -32,6 +32,7 @@ #include "cairo-drm-private.h" #include "cairo-drm-radeon-private.h" #include "cairo-drm-ioctl-private.h" + #include "cairo-error-private.h" #include <sys/ioctl.h> @@ -371,7 +372,7 @@ radeon_bo_create_for_name (radeon_device_t *device, return &bo->base; } -void +static void radeon_bo_release (void *_dev, void *_bo) { radeon_device_t *device = _dev; @@ -431,6 +432,8 @@ radeon_device_init (radeon_device_t *device, int fd) { _radeon_device_init_bo_cache (device); + device->base.bo.release = radeon_bo_release; + return CAIRO_STATUS_SUCCESS; } diff --git a/src/drm/cairo-drm-surface.c b/src/drm/cairo-drm-surface.c index 429b528d..e37d3e2f 100644 --- a/src/drm/cairo-drm-surface.c +++ b/src/drm/cairo-drm-surface.c @@ -33,8 +33,8 @@ #include "cairoint.h" #include "cairo-drm-private.h" + #include "cairo-error-private.h" -#include "cairo-surface-fallback-private.h" cairo_surface_t * _cairo_drm_surface_create_similar (void *abstract_surface, @@ -43,13 +43,8 @@ _cairo_drm_surface_create_similar (void *abstract_surface, int height) { cairo_drm_surface_t *surface = abstract_surface; - cairo_drm_device_t *device; - - if (surface->fallback != NULL) - return _cairo_image_surface_create_with_content (content, - width, height); + cairo_drm_device_t *device = (cairo_drm_device_t *) surface->base.device; - device = surface->device; if (width > device->max_surface_size || height > device->max_surface_size) return NULL; @@ -60,8 +55,6 @@ void _cairo_drm_surface_init (cairo_drm_surface_t *surface, cairo_drm_device_t *device) { - surface->device = cairo_drm_device_reference (device); - surface->bo = NULL; surface->width = 0; surface->height = 0; @@ -75,9 +68,7 @@ cairo_status_t _cairo_drm_surface_finish (cairo_drm_surface_t *surface) { if (surface->bo != NULL) - cairo_drm_bo_destroy (surface->device, surface->bo); - - cairo_drm_device_destroy (surface->device); + cairo_drm_bo_destroy (surface->base.device, surface->bo); return CAIRO_STATUS_SUCCESS; } @@ -105,147 +96,32 @@ _cairo_drm_surface_get_extents (void *abstract_surface, return TRUE; } -cairo_int_status_t -_cairo_drm_surface_paint (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_clip_t *clip) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) - return _cairo_surface_paint (surface->fallback, op, source, clip); - - return _cairo_surface_fallback_paint (&surface->base, op, source, clip); -} - -cairo_int_status_t -_cairo_drm_surface_mask (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - const cairo_pattern_t *mask, - cairo_clip_t *clip) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) { - return _cairo_surface_mask (surface->fallback, - op, source, mask, - clip); - } - - return _cairo_surface_fallback_mask (&surface->base, - op, source, mask, clip); -} - -cairo_int_status_t -_cairo_drm_surface_stroke (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_path_fixed_t *path, - const cairo_stroke_style_t *style, - const cairo_matrix_t *ctm, - const cairo_matrix_t *ctm_inverse, - double tolerance, - cairo_antialias_t antialias, - cairo_clip_t *clip) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) { - return _cairo_surface_stroke (surface->fallback, - op, source, - path, style, - ctm, ctm_inverse, - tolerance, antialias, - clip); - } - - return _cairo_surface_fallback_stroke (&surface->base, op, source, - path, style, - ctm, ctm_inverse, - tolerance, antialias, - clip); -} - -cairo_int_status_t -_cairo_drm_surface_fill (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_path_fixed_t *path, - cairo_fill_rule_t fill_rule, - double tolerance, - cairo_antialias_t antialias, - cairo_clip_t *clip) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) { - return _cairo_surface_fill (surface->fallback, - op, source, - path, fill_rule, - tolerance, antialias, - clip); - } - - return _cairo_surface_fallback_fill (&surface->base, op, source, - path, fill_rule, - tolerance, antialias, - clip); -} - -cairo_int_status_t -_cairo_drm_surface_show_glyphs (void *abstract_surface, - cairo_operator_t op, - const cairo_pattern_t *source, - cairo_glyph_t *glyphs, - int num_glyphs, - cairo_scaled_font_t *scaled_font, - cairo_clip_t *clip, - int *remaining_glyphs) -{ - cairo_drm_surface_t *surface = abstract_surface; - - if (surface->fallback != NULL) { - *remaining_glyphs = 0; - return _cairo_surface_show_text_glyphs (surface->fallback, - op, source, - NULL, 0, - glyphs, num_glyphs, - NULL, 0, 0, - scaled_font, - clip); - } - - return _cairo_surface_fallback_show_glyphs (&surface->base, - op, source, - glyphs, num_glyphs, - scaled_font, - clip); -} - - cairo_surface_t * -cairo_drm_surface_create (cairo_drm_device_t *device, +cairo_drm_surface_create (cairo_device_t *abstract_device, cairo_content_t content, int width, int height) { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; cairo_surface_t *surface; if (! CAIRO_CONTENT_VALID (content)) return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_CONTENT)); - if (device != NULL && device->status) + if (device != NULL && device->base.status) { - surface = _cairo_surface_create_in_error (device->status); + surface = _cairo_surface_create_in_error (device->base.status); } else if (device == NULL || device->surface.create == NULL || width == 0 || width > device->max_surface_size || height == 0 || height > device->max_surface_size) { - surface = _cairo_image_surface_create_with_content (content, - width, height); + surface = cairo_image_surface_create (_cairo_format_from_content (content), + width, height); + } + else if (device->base.finished) + { + surface = _cairo_surface_create_in_error (CAIRO_STATUS_SURFACE_FINISHED); } else { @@ -256,19 +132,20 @@ cairo_drm_surface_create (cairo_drm_device_t *device, } cairo_surface_t * -cairo_drm_surface_create_for_name (cairo_drm_device_t *device, +cairo_drm_surface_create_for_name (cairo_device_t *abstract_device, unsigned int name, cairo_format_t format, int width, int height, int stride) { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; cairo_surface_t *surface; if (! CAIRO_FORMAT_VALID (format)) return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); - if (device != NULL && device->status) + if (device != NULL && device->base.status) { - surface = _cairo_surface_create_in_error (device->status); + surface = _cairo_surface_create_in_error (device->base.status); } else if (device == NULL || device->surface.create_for_name == NULL) { @@ -280,6 +157,10 @@ cairo_drm_surface_create_for_name (cairo_drm_device_t *device, { surface = _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_SIZE)); } + else if (device->base.finished) + { + surface = _cairo_surface_create_in_error (CAIRO_STATUS_SURFACE_FINISHED); + } else { surface = device->surface.create_for_name (device, @@ -289,20 +170,25 @@ cairo_drm_surface_create_for_name (cairo_drm_device_t *device, return surface; } +slim_hidden_def (cairo_drm_surface_create_for_name); cairo_surface_t * -cairo_drm_surface_create_from_cacheable_image (cairo_drm_device_t *dev, +cairo_drm_surface_create_from_cacheable_image (cairo_device_t *abstract_device, cairo_surface_t *surface) { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; + if (surface->status) { surface = _cairo_surface_create_in_error (surface->status); - } else if (dev != NULL && dev->status) { - surface = _cairo_surface_create_in_error (dev->status); - } else if (dev == NULL || dev->surface.create_from_cacheable_image == NULL) { + } else if (device != NULL && device->base.status) { + surface = _cairo_surface_create_in_error (device->base.status); + } else if (device == NULL || device->surface.create_from_cacheable_image == NULL) { /* XXX invalid device! */ surface = _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_FORMAT)); + } else if (device->base.finished) { + surface = _cairo_surface_create_in_error (CAIRO_STATUS_SURFACE_FINISHED); } else { - surface = dev->surface.create_from_cacheable_image (dev, surface); + surface = device->surface.create_from_cacheable_image (device, surface); } return surface; @@ -324,32 +210,22 @@ cairo_status_t cairo_drm_surface_enable_scan_out (cairo_surface_t *abstract_surface) { cairo_drm_surface_t *surface; + cairo_drm_device_t *device; surface = _cairo_surface_as_drm (abstract_surface); - if (surface == NULL) + if (unlikely (surface == NULL)) return _cairo_error (CAIRO_STATUS_SURFACE_TYPE_MISMATCH); + if (unlikely (surface->base.finished)) + return _cairo_error (CAIRO_STATUS_SURFACE_FINISHED); - if (surface->device->surface.enable_scan_out == NULL) + device = (cairo_drm_device_t *) surface->base.device; + if (device->surface.enable_scan_out == NULL) return CAIRO_STATUS_SUCCESS; - return surface->device->surface.enable_scan_out (abstract_surface); -} + if (unlikely (device->base.finished)) + return _cairo_error (CAIRO_STATUS_SURFACE_FINISHED); -cairo_drm_device_t * -cairo_drm_surface_get_device (cairo_surface_t *abstract_surface) -{ - cairo_drm_surface_t *surface; - - if (unlikely (abstract_surface->status)) - return _cairo_drm_device_create_in_error (abstract_surface->status); - - surface = _cairo_surface_as_drm (abstract_surface); - if (surface == NULL) { - _cairo_error_throw (CAIRO_STATUS_SURFACE_TYPE_MISMATCH); - return NULL; - } - - return surface->device; + return device->surface.enable_scan_out (abstract_surface); } unsigned int @@ -371,13 +247,15 @@ _cairo_drm_surface_flink (void *abstract_surface) { cairo_drm_surface_t *surface = abstract_surface; - return _cairo_drm_bo_flink (surface->device, surface->bo); + return _cairo_drm_bo_flink ((cairo_drm_device_t *) surface->base.device, + surface->bo); } unsigned int cairo_drm_surface_get_name (cairo_surface_t *abstract_surface) { cairo_drm_surface_t *surface; + cairo_drm_device_t *device; cairo_status_t status; surface = _cairo_surface_as_drm (abstract_surface); @@ -389,10 +267,11 @@ cairo_drm_surface_get_name (cairo_surface_t *abstract_surface) if (surface->bo->name) return surface->bo->name; - if (surface->device->surface.flink == NULL) + device = (cairo_drm_device_t *) surface->base.device; + if (device->surface.flink == NULL) return 0; - status = surface->device->surface.flink (abstract_surface); + status = device->surface.flink (abstract_surface); if (status) { if (_cairo_status_is_error (status)) status = _cairo_surface_set_error (abstract_surface, status); @@ -456,10 +335,8 @@ cairo_surface_t * cairo_drm_surface_map (cairo_surface_t *abstract_surface) { cairo_drm_surface_t *surface; - cairo_rectangle_int_t roi; - cairo_image_surface_t *image; + cairo_drm_device_t *device; cairo_status_t status; - void *image_extra; if (unlikely (abstract_surface->status)) return _cairo_surface_create_in_error (abstract_surface->status); @@ -474,23 +351,9 @@ cairo_drm_surface_map (cairo_surface_t *abstract_surface) return _cairo_surface_create_in_error (status); } - roi.x = roi.y = 0; - roi.width = surface->width; - roi.height = surface->height; - - status = _cairo_surface_acquire_dest_image (abstract_surface, - &roi, - &image, - &roi, - &image_extra); - if (unlikely (status)) - return _cairo_surface_create_in_error (status); - - assert (image_extra == NULL); - surface->map_count++; - - return &image->base; + device = (cairo_drm_device_t *) surface->base.device; + return cairo_surface_reference (device->surface.map_to_image (surface)); } void diff --git a/src/drm/cairo-drm.c b/src/drm/cairo-drm.c index a218fa52..9ccb10d5 100644 --- a/src/drm/cairo-drm.c +++ b/src/drm/cairo-drm.c @@ -33,6 +33,8 @@ #include "cairoint.h" #include "cairo-drm-private.h" + +#include "cairo-device-private.h" #include "cairo-error-private.h" #define LIBUDEV_I_KNOW_THE_API_IS_SUBJECT_TO_CHANGE @@ -43,29 +45,6 @@ static cairo_drm_device_t *_cairo_drm_known_devices; static cairo_drm_device_t *_cairo_drm_default_device; -static const cairo_drm_device_t _nil_device = { - CAIRO_REFERENCE_COUNT_INVALID, - CAIRO_STATUS_NO_MEMORY -}; - -static const cairo_drm_device_t _invalid_device = { - CAIRO_REFERENCE_COUNT_INVALID, - CAIRO_STATUS_INVALID_CONTENT -}; - -cairo_drm_device_t * -_cairo_drm_device_create_in_error (cairo_status_t status) -{ - switch ((int) status) { - default: - ASSERT_NOT_REACHED; - case CAIRO_STATUS_NO_MEMORY: - return (cairo_drm_device_t *) &_nil_device; - case CAIRO_STATUS_INVALID_CONTENT: - return (cairo_drm_device_t *) &_invalid_device; - } -} - static const char * get_udev_property(struct udev_device *device, const char *name) { @@ -81,16 +60,69 @@ get_udev_property(struct udev_device *device, const char *name) return NULL; } +static void +_device_flush (void *abstract_device) +{ + cairo_drm_device_t *device = abstract_device; + + device->device.flush (device); +} + +static void +_device_finish (void *abstract_device) +{ + cairo_drm_device_t *device = abstract_device; + + CAIRO_MUTEX_LOCK (_cairo_drm_device_mutex); + if (device->prev != NULL) + device->prev->next = device->next; + else + _cairo_drm_known_devices = device->next; + if (device->next != NULL) + device->next->prev = device->prev; + + CAIRO_MUTEX_UNLOCK (_cairo_drm_device_mutex); + + if (_cairo_atomic_ptr_cmpxchg (&_cairo_drm_default_device, + device, NULL) == device) + { + cairo_device_destroy (&device->base); + } +} + +static void +_device_destroy (void *abstract_device) +{ + cairo_drm_device_t *device = abstract_device; + + device->device.destroy (device); +} + +static const cairo_device_backend_t _cairo_drm_device_backend = { + CAIRO_DEVICE_TYPE_DRM, + + NULL, NULL, /* lock, unlock */ + + _device_flush, + _device_finish, + _device_destroy, +}; + cairo_drm_device_t * _cairo_drm_device_init (cairo_drm_device_t *dev, int fd, dev_t devid, + int vendor_id, + int chip_id, int max_surface_size) { - CAIRO_REFERENCE_COUNT_INIT (&dev->ref_count, 1); - dev->status = CAIRO_STATUS_SUCCESS; + assert (CAIRO_MUTEX_IS_LOCKED (_cairo_drm_device_mutex)); + + _cairo_device_init (&dev->base, &_cairo_drm_device_backend); dev->id = devid; + dev->vendor_id = vendor_id; + dev->chip_id = chip_id; dev->fd = fd; dev->max_surface_size = max_surface_size; @@ -102,12 +134,12 @@ _cairo_drm_device_init (cairo_drm_device_t *dev, _cairo_drm_known_devices = dev; if (_cairo_drm_default_device == NULL) - _cairo_drm_default_device = cairo_drm_device_reference (dev); + _cairo_drm_default_device = (cairo_drm_device_t *) cairo_device_reference (&dev->base); return dev; } -cairo_drm_device_t * +cairo_device_t * cairo_drm_device_get (struct udev_device *device) { static const struct dri_driver_entry { @@ -115,7 +147,34 @@ cairo_drm_device_get (struct udev_device *device) uint32_t chip_id; cairo_drm_device_create_func_t create_func; } driver_map[] = { + { 0x8086, 0x29a2, _cairo_drm_i965_device_create }, /* I965_G */ + { 0x8086, 0x2982, _cairo_drm_i965_device_create }, /* G35_G */ + { 0x8086, 0x2992, _cairo_drm_i965_device_create }, /* I965_Q */ + { 0x8086, 0x2972, _cairo_drm_i965_device_create }, /* I946_GZ */ + { 0x8086, 0x2a02, _cairo_drm_i965_device_create }, /* I965_GM */ + { 0x8086, 0x2a12, _cairo_drm_i965_device_create }, /* I965_GME */ + { 0x8086, 0x2e02, _cairo_drm_i965_device_create }, /* IGD_E_G */ + { 0x8086, 0x2e22, _cairo_drm_i965_device_create }, /* G45_G */ + { 0x8086, 0x2e12, _cairo_drm_i965_device_create }, /* Q45_G */ + { 0x8086, 0x2e32, _cairo_drm_i965_device_create }, /* G41_G */ + { 0x8086, 0x2a42, _cairo_drm_i965_device_create }, /* GM45_GM */ + + { 0x8086, 0x2582, _cairo_drm_i915_device_create }, /* I915_G */ + { 0x8086, 0x2592, _cairo_drm_i915_device_create }, /* I915_GM */ + { 0x8086, 0x258a, _cairo_drm_i915_device_create }, /* E7221_G */ + { 0x8086, 0x2772, _cairo_drm_i915_device_create }, /* I945_G */ + { 0x8086, 0x27a2, _cairo_drm_i915_device_create }, /* I945_GM */ + { 0x8086, 0x27ae, _cairo_drm_i915_device_create }, /* I945_GME */ + { 0x8086, 0x29c2, _cairo_drm_i915_device_create }, /* G33_G */ + { 0x8086, 0x29b2, _cairo_drm_i915_device_create }, /* Q35_G */ + { 0x8086, 0x29d2, _cairo_drm_i915_device_create }, /* Q33_G */ + { 0x8086, 0xa011, _cairo_drm_i915_device_create }, /* IGD_GM */ + { 0x8086, 0xa001, _cairo_drm_i915_device_create }, /* IGD_G */ + + /* XXX i830 */ + { 0x8086, ~0, _cairo_drm_intel_device_create }, + { 0x1002, ~0, _cairo_drm_radeon_device_create }, #if CAIRO_HAS_GALLIUM_SURFACE { ~0, ~0, _cairo_drm_gallium_device_create }, @@ -135,16 +194,16 @@ cairo_drm_device_get (struct udev_device *device) CAIRO_MUTEX_LOCK (_cairo_drm_device_mutex); for (dev = _cairo_drm_known_devices; dev != NULL; dev = dev->next) { if (dev->id == devid) { - dev = cairo_drm_device_reference (dev); + dev = (cairo_drm_device_t *) cairo_device_reference (&dev->base); goto DONE; } } - dev = (cairo_drm_device_t *) &_nil_device; parent = udev_device_get_parent (device); pci_id = get_udev_property (parent, "PCI_ID"); if (sscanf (pci_id, "%x:%x", &vendor_id, &chip_id) != 2) { - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); + dev = (cairo_drm_device_t *) + _cairo_device_create_in_error (CAIRO_STATUS_DEVICE_ERROR); goto DONE; } @@ -166,8 +225,8 @@ cairo_drm_device_get (struct udev_device *device) } if (i == ARRAY_LENGTH (driver_map)) { - /* XXX should be no driver or something*/ - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); + dev = (cairo_drm_device_t *) + _cairo_device_create_in_error (CAIRO_STATUS_DEVICE_ERROR); goto DONE; } } @@ -190,22 +249,21 @@ cairo_drm_device_get (struct udev_device *device) DONE: CAIRO_MUTEX_UNLOCK (_cairo_drm_device_mutex); - return dev; + return &dev->base; } slim_hidden_def (cairo_drm_device_get); -cairo_drm_device_t * +cairo_device_t * cairo_drm_device_get_for_fd (int fd) { struct stat st; struct udev *udev; struct udev_device *device; - cairo_drm_device_t *dev = NULL; + cairo_device_t *dev = NULL; if (fstat (fd, &st) < 0 || ! S_ISCHR (st.st_mode)) { //_cairo_error_throw (CAIRO_STATUS_INVALID_DEVICE); - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - return (cairo_drm_device_t *) &_nil_device; + return _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); } udev = udev_new (); @@ -220,25 +278,24 @@ cairo_drm_device_get_for_fd (int fd) return dev; } +slim_hidden_def (cairo_drm_device_get_for_fd); -cairo_drm_device_t * +cairo_device_t * cairo_drm_device_default (void) { struct udev *udev; struct udev_enumerate *e; struct udev_list_entry *entry; - cairo_drm_device_t *dev; + cairo_device_t *dev; /* optimistic atomic pointer read */ - dev = _cairo_drm_default_device; + dev = &_cairo_drm_default_device->base; if (dev != NULL) return dev; udev = udev_new(); - if (udev == NULL) { - _cairo_error_throw (CAIRO_STATUS_NO_MEMORY); - return (cairo_drm_device_t *) &_nil_device; - } + if (udev == NULL) + return _cairo_device_create_in_error (CAIRO_STATUS_NO_MEMORY); e = udev_enumerate_new (udev); udev_enumerate_add_match_subsystem (e, "drm"); @@ -255,8 +312,9 @@ cairo_drm_device_default (void) udev_device_unref (device); if (dev != NULL) { - if (dev->fd == -1) { /* try again, we may find a usable card */ - cairo_drm_device_destroy (dev); + if (((cairo_drm_device_t *) dev)->fd == -1) { + /* try again, we may find a usable card */ + cairo_device_destroy (dev); dev = NULL; } else break; @@ -265,7 +323,7 @@ cairo_drm_device_default (void) udev_enumerate_unref (e); udev_unref (udev); - cairo_drm_device_destroy (dev); /* owned by _cairo_drm_default_device */ + cairo_device_destroy (dev); /* owned by _cairo_drm_default_device */ return dev; } slim_hidden_def (cairo_drm_device_default); @@ -274,90 +332,56 @@ void _cairo_drm_device_reset_static_data (void) { if (_cairo_drm_default_device != NULL) { - cairo_drm_device_destroy (_cairo_drm_default_device); + cairo_device_t *device = &_cairo_drm_default_device->base; _cairo_drm_default_device = NULL; + cairo_device_destroy (device); } } -cairo_drm_device_t * -cairo_drm_device_reference (cairo_drm_device_t *device) -{ - if (device == NULL || - CAIRO_REFERENCE_COUNT_IS_INVALID (&device->ref_count)) - { - return device; - } - - assert (CAIRO_REFERENCE_COUNT_HAS_REFERENCE (&device->ref_count)); - _cairo_reference_count_inc (&device->ref_count); - - return device; -} -slim_hidden_def (cairo_drm_device_reference); - int -cairo_drm_device_get_fd (cairo_drm_device_t *device) +cairo_drm_device_get_fd (cairo_device_t *abstract_device) { - if (device->status) + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; + + if (device->base.status) return -1; return device->fd; } -cairo_status_t -cairo_drm_device_status (cairo_drm_device_t *device) -{ - if (device == NULL) - return CAIRO_STATUS_NULL_POINTER; - - return device->status; -} - void _cairo_drm_device_fini (cairo_drm_device_t *device) { - CAIRO_MUTEX_LOCK (_cairo_drm_device_mutex); - if (device->prev != NULL) - device->prev->next = device->next; - else - _cairo_drm_known_devices = device->next; - if (device->next != NULL) - device->next->prev = device->prev; - CAIRO_MUTEX_UNLOCK (_cairo_drm_device_mutex); - if (device->fd != -1) close (device->fd); } void -cairo_drm_device_destroy (cairo_drm_device_t *device) +cairo_drm_device_throttle (cairo_device_t *abstract_device) { - if (device == NULL || - CAIRO_REFERENCE_COUNT_IS_INVALID (&device->ref_count)) - { + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; + cairo_status_t status; + + if (unlikely (device->base.status)) return; - } - assert (CAIRO_REFERENCE_COUNT_HAS_REFERENCE (&device->ref_count)); - if (! _cairo_reference_count_dec_and_test (&device->ref_count)) + if (device->device.throttle == NULL) return; - device->device.destroy (device); + status = device->device.throttle (device); + if (unlikely (status)) + _cairo_status_set_error (&device->base.status, status); } -slim_hidden_def (cairo_drm_device_destroy); -void -cairo_drm_device_throttle (cairo_drm_device_t *dev) +cairo_bool_t +_cairo_drm_size_is_valid (cairo_device_t *abstract_device, + int width, int height) { - cairo_status_t status; + cairo_drm_device_t *device = (cairo_drm_device_t *) abstract_device; - if (unlikely (dev->status)) - return; + if (unlikely (device->base.status)) + return FALSE; - if (dev->device.throttle == NULL) - return; - - status = dev->device.throttle (dev); - if (unlikely (status)) - _cairo_status_set_error (&dev->status, status); + return width <= device->max_surface_size && + height <= device->max_surface_size; } |