diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2011-03-31 23:29:25 +0200 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2011-04-01 13:07:43 +0200 |
commit | a6751bac8dd05e2f04960d40b670d35d3b63704e (patch) | |
tree | 684873d2ed89a531f6b068462ef6d3df5d2948a6 /tests | |
parent | 1baab2e43a40fe33606668c0be19ab772e25cc7a (diff) |
gem_stress: split out gen3 render copy
Header conflicts with other headers for different generations.
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/Makefile.am | 7 | ||||
-rw-r--r-- | tests/gem_stress.c | 276 | ||||
-rw-r--r-- | tests/gem_stress.h | 81 | ||||
-rw-r--r-- | tests/gem_stress_i915.c | 197 |
4 files changed, 292 insertions, 269 deletions
diff --git a/tests/Makefile.am b/tests/Makefile.am index ce2f8915..b09264e8 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,7 +1,12 @@ -bin_PROGRAMS = \ +noinst_PROGRAMS = \ gem_stress \ $(NULL) +gem_stress_SOURCES = \ + gem_stress.c \ + gem_stress.h \ + gem_stress_i915.c + TESTS = getversion \ getclient \ getstats \ diff --git a/tests/gem_stress.c b/tests/gem_stress.c index 61e4b3c2..e359d8dd 100644 --- a/tests/gem_stress.c +++ b/tests/gem_stress.c @@ -49,25 +49,7 @@ * In short: designed for maximum evilness. */ -#include <stdlib.h> -#include <sys/ioctl.h> -#include <stdio.h> -#include <string.h> -#include <assert.h> -#include <fcntl.h> -#include <inttypes.h> -#include <errno.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <getopt.h> -#include "drm.h" -#include "i915_drm.h" -#include "drmtest.h" -#include "intel_bufmgr.h" -#include "intel_batchbuffer.h" -#include "intel_gpu_tools.h" -#include "i915_reg.h" -#include "i915_3d.h" +#include "gem_stress.h" #define CMD_POLY_STIPPLE_OFFSET 0x7906 @@ -92,36 +74,15 @@ static uint64_t gem_aperture_size(int fd) return aperture.aper_size; } -struct scratch_buf { - drm_intel_bo *bo; - uint32_t stride; - uint32_t tiling; - uint32_t *data; - unsigned num_tiles; -}; - -static drm_intel_bufmgr *bufmgr; +drm_intel_bufmgr *bufmgr; struct intel_batchbuffer *batch; -static int drm_fd; -static int devid; -static int num_fences; +int drm_fd; +int devid; +int num_fences; drm_intel_bo *busy_bo; -static struct { - unsigned scratch_buf_size; - unsigned num_buffers; - int trace_tile; - int no_hw; - int gpu_busy_load; - int use_render; -} options; - -#define MAX_BUFS 4096 -#define SCRATCH_BUF_SIZE 1024*1024 -#define BUSY_BUF_SIZE (256*4096) -#define TILE_SIZE 16 -#define TILE_BYTES (TILE_SIZE*TILE_SIZE*sizeof(uint32_t)) +struct option_struct options; static struct scratch_buf buffers[2][MAX_BUFS]; /* tile i is at logical position tile_permutation[i] */ @@ -133,7 +94,7 @@ static unsigned num_total_tiles = 0; #define TILES_PER_BUF (num_total_tiles / num_buffers) -static int fence_storm = 0; +int fence_storm = 0; static int gpu_busy_load = 10; static void tile2xy(struct scratch_buf *buf, unsigned tile, unsigned *x, unsigned *y) @@ -145,7 +106,7 @@ static void tile2xy(struct scratch_buf *buf, unsigned tile, unsigned *x, unsigne /* All this gem trashing wastes too much cpu time, so give the gpu something to * do to increase changes for races. */ -static void keep_gpu_busy(void) +void keep_gpu_busy(void) { uint32_t src_pitch, dst_pitch, cmd_bits; int tmp; @@ -318,227 +279,6 @@ static void blitter_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned s } } -static unsigned buf_width(struct scratch_buf *buf) -{ - return buf->stride/sizeof(uint32_t); -} - -static unsigned buf_height(struct scratch_buf *buf) -{ - return options.scratch_buf_size/buf->stride; -} - -static void emit_vertex(float f) -{ - union { float f; uint32_t ui; } u; - u.f = f; - OUT_BATCH(u.ui); -} - -static void emit_vertex_normalized(float f, float total) -{ - union { float f; uint32_t ui; } u; - u.f = f / total; - OUT_BATCH(u.ui); -} - -static void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, - struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, - unsigned logical_tile_no) -{ - uint32_t src_pitch, dst_pitch, cmd_bits; - src_pitch = src->stride; - dst_pitch = dst->stride; - cmd_bits = 0; - static unsigned keep_gpu_busy_counter = 0; - - /* check both edges of the fence usage */ - if (keep_gpu_busy_counter & 1 && !fence_storm) - keep_gpu_busy(); - - /* invariant state */ - { - OUT_BATCH(_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); - OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | - IAB_MODIFY_ENABLE | - IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) | - IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << - IAB_SRC_FACTOR_SHIFT) | - IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << - IAB_DST_FACTOR_SHIFT)); - OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); - OUT_BATCH(0); - OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); - OUT_BATCH(0); - OUT_BATCH(_3DSTATE_DFLT_Z_CMD); - OUT_BATCH(0); - OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); - OUT_BATCH(_3DSTATE_MODES_4_CMD | - ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) | - ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) | - ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff)); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2); - OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */ - OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | - S4_LINE_WIDTH_ONE | - S4_CULLMODE_NONE | - S4_VFMT_XY); - OUT_BATCH(0x00000000); /* Stencil. */ - OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); - OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ - OUT_BATCH(0); - OUT_BATCH(_3DSTATE_STIPPLE); - OUT_BATCH(0x00000000); - OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); - } - - /* samler state */ - { -#define TEX_COUNT 1 - uint32_t tiling_bits = 0; - if (src->tiling != I915_TILING_NONE) - tiling_bits = MS3_TILED_SURFACE; - if (src->tiling == I915_TILING_Y) - tiling_bits |= MS3_TILE_WALK; - - OUT_BATCH(_3DSTATE_MAP_STATE | (3 * TEX_COUNT)); - OUT_BATCH((1 << TEX_COUNT) - 1); - OUT_RELOC(src->bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); - OUT_BATCH(MAPSURF_32BIT | MT_32BIT_ARGB8888 | - tiling_bits | - (buf_height(src) - 1) << MS3_HEIGHT_SHIFT | - (buf_width(src) - 1) << MS3_WIDTH_SHIFT); - OUT_BATCH((src->stride/4-1) << MS4_PITCH_SHIFT); - - OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT)); - OUT_BATCH((1 << TEX_COUNT) - 1); - OUT_BATCH(MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT | - FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | - FILTER_NEAREST << SS2_MIN_FILTER_SHIFT); - OUT_BATCH(SS3_NORMALIZED_COORDS | - TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT | - TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT | - 0 << SS3_TEXTUREMAP_INDEX_SHIFT); - OUT_BATCH(0x00000000); - } - - /* render target state */ - { - uint32_t tiling_bits = 0; - if (dst->tiling != I915_TILING_NONE) - tiling_bits = BUF_3D_TILED_SURFACE; - if (dst->tiling == I915_TILING_Y) - tiling_bits |= BUF_3D_TILE_WALK_Y; - - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | - BUF_3D_PITCH(dst->stride)); - OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); - - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(COLR_BUF_ARGB8888 | - DSTORG_HORT_BIAS(0x8) | - DSTORG_VERT_BIAS(0x8)); - - /* draw rect is unconditional */ - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(0x00000000); - OUT_BATCH(0x00000000); /* ymin, xmin */ - OUT_BATCH(DRAW_YMAX(buf_height(dst) - 1) | - DRAW_XMAX(buf_width(dst) - 1)); - /* yorig, xorig (relate to color buffer?) */ - OUT_BATCH(0x00000000); - } - - /* texfmt */ - { - uint32_t ss2 = ~0; - ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT); - ss2 |= S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); - OUT_BATCH(ss2); - OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | - BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | - BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | - BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | I1_LOAD_S(1) | 1); - OUT_BATCH(0); /* no vbo */ - OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | - (4 << S1_VERTEX_PITCH_SHIFT)); - } - - /* frage shader */ - { - OUT_BATCH(_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2)); - /* decl FS_T0 */ - OUT_BATCH(D0_DCL | - REG_TYPE(FS_T0) << D0_TYPE_SHIFT | - REG_NR(FS_T0) << D0_NR_SHIFT | - ((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); - OUT_BATCH(0); - OUT_BATCH(0); - /* decl FS_S0 */ - OUT_BATCH(D0_DCL | - (REG_TYPE(FS_S0) << D0_TYPE_SHIFT) | - (REG_NR(FS_S0) << D0_NR_SHIFT) | - ((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); - OUT_BATCH(0); - OUT_BATCH(0); - /* texld(FS_OC, FS_S0, FS_T0 */ - OUT_BATCH(T0_TEXLD | - (REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) | - (REG_NR(FS_OC) << T0_DEST_NR_SHIFT) | - (REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT)); - OUT_BATCH((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) | - (REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT)); - OUT_BATCH(0); - } - - OUT_BATCH(PRIM3D_RECTLIST | (3*4 - 1)); - emit_vertex(dst_x + TILE_SIZE); - emit_vertex(dst_y + TILE_SIZE); - emit_vertex_normalized(src_x + TILE_SIZE, buf_width(src)); - emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); - - emit_vertex(dst_x); - emit_vertex(dst_y + TILE_SIZE); - emit_vertex_normalized(src_x, buf_width(src)); - emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); - - emit_vertex(dst_x); - emit_vertex(dst_y); - emit_vertex_normalized(src_x, buf_width(src)); - emit_vertex_normalized(src_y, buf_height(src)); - - if (!(keep_gpu_busy_counter & 1) && !fence_storm) - keep_gpu_busy(); - - keep_gpu_busy_counter++; - - intel_batchbuffer_flush(batch); -} - static void render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, unsigned logical_tile_no) diff --git a/tests/gem_stress.h b/tests/gem_stress.h new file mode 100644 index 00000000..53ef8ebc --- /dev/null +++ b/tests/gem_stress.h @@ -0,0 +1,81 @@ +#include <stdlib.h> +#include <sys/ioctl.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <getopt.h> +#include "drm.h" +#include "i915_drm.h" +#include "drmtest.h" +#include "intel_bufmgr.h" +#include "intel_batchbuffer.h" +#include "intel_gpu_tools.h" + +struct scratch_buf { + drm_intel_bo *bo; + uint32_t stride; + uint32_t tiling; + uint32_t *data; + unsigned num_tiles; +}; + +struct option_struct { + unsigned scratch_buf_size; + unsigned num_buffers; + int trace_tile; + int no_hw; + int gpu_busy_load; + int use_render; +}; + +extern struct option_struct options; +extern drm_intel_bufmgr *bufmgr; +extern struct intel_batchbuffer *batch; +extern int drm_fd; +extern int devid; + +extern int fence_storm; + +#define MAX_BUFS 4096 +#define SCRATCH_BUF_SIZE 1024*1024 +#define BUSY_BUF_SIZE (256*4096) +#define TILE_SIZE 16 +#define TILE_BYTES (TILE_SIZE*TILE_SIZE*sizeof(uint32_t)) + +void keep_gpu_busy(void); + +static inline void emit_vertex(float f) +{ + union { float f; uint32_t ui; } u; + u.f = f; + OUT_BATCH(u.ui); +} + +static inline void emit_vertex_normalized(float f, float total) +{ + union { float f; uint32_t ui; } u; + u.f = f / total; + OUT_BATCH(u.ui); +} + +static inline unsigned buf_width(struct scratch_buf *buf) +{ + return buf->stride/sizeof(uint32_t); +} + +static inline unsigned buf_height(struct scratch_buf *buf) +{ + return options.scratch_buf_size/buf->stride; +} + +void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no); +void gen2_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no); diff --git a/tests/gem_stress_i915.c b/tests/gem_stress_i915.c new file mode 100644 index 00000000..66787c7f --- /dev/null +++ b/tests/gem_stress_i915.c @@ -0,0 +1,197 @@ +#include "i915_reg.h" +#include "i915_3d.h" +#include "gem_stress.h" + +void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no) +{ + static unsigned keep_gpu_busy_counter = 0; + + /* check both edges of the fence usage */ + if (keep_gpu_busy_counter & 1 && !fence_storm) + keep_gpu_busy(); + + /* invariant state */ + { + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) | + IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << + IAB_SRC_FACTOR_SHIFT) | + IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << + IAB_DST_FACTOR_SHIFT)); + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); + OUT_BATCH(_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) | + ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff)); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2); + OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */ + OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | + S4_LINE_WIDTH_ONE | + S4_CULLMODE_NONE | + S4_VFMT_XY); + OUT_BATCH(0x00000000); /* Stencil. */ + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_STIPPLE); + OUT_BATCH(0x00000000); + OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); + } + + /* samler state */ + { +#define TEX_COUNT 1 + uint32_t tiling_bits = 0; + if (src->tiling != I915_TILING_NONE) + tiling_bits = MS3_TILED_SURFACE; + if (src->tiling == I915_TILING_Y) + tiling_bits |= MS3_TILE_WALK; + + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * TEX_COUNT)); + OUT_BATCH((1 << TEX_COUNT) - 1); + OUT_RELOC(src->bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); + OUT_BATCH(MAPSURF_32BIT | MT_32BIT_ARGB8888 | + tiling_bits | + (buf_height(src) - 1) << MS3_HEIGHT_SHIFT | + (buf_width(src) - 1) << MS3_WIDTH_SHIFT); + OUT_BATCH((src->stride/4-1) << MS4_PITCH_SHIFT); + + OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT)); + OUT_BATCH((1 << TEX_COUNT) - 1); + OUT_BATCH(MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT | + FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | + FILTER_NEAREST << SS2_MIN_FILTER_SHIFT); + OUT_BATCH(SS3_NORMALIZED_COORDS | + TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT | + TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT | + 0 << SS3_TEXTUREMAP_INDEX_SHIFT); + OUT_BATCH(0x00000000); + } + + /* render target state */ + { + uint32_t tiling_bits = 0; + if (dst->tiling != I915_TILING_NONE) + tiling_bits = BUF_3D_TILED_SURFACE; + if (dst->tiling == I915_TILING_Y) + tiling_bits |= BUF_3D_TILE_WALK_Y; + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | + BUF_3D_PITCH(dst->stride)); + OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(COLR_BUF_ARGB8888 | + DSTORG_HORT_BIAS(0x8) | + DSTORG_VERT_BIAS(0x8)); + + /* draw rect is unconditional */ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0x00000000); + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(buf_height(dst) - 1) | + DRAW_XMAX(buf_width(dst) - 1)); + /* yorig, xorig (relate to color buffer?) */ + OUT_BATCH(0x00000000); + } + + /* texfmt */ + { + uint32_t ss2 = ~0; + ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); + OUT_BATCH(ss2); + OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | + BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | + BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | + BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | I1_LOAD_S(1) | 1); + OUT_BATCH(0); /* no vbo */ + OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | + (4 << S1_VERTEX_PITCH_SHIFT)); + } + + /* frage shader */ + { + OUT_BATCH(_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2)); + /* decl FS_T0 */ + OUT_BATCH(D0_DCL | + REG_TYPE(FS_T0) << D0_TYPE_SHIFT | + REG_NR(FS_T0) << D0_NR_SHIFT | + ((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); + OUT_BATCH(0); + OUT_BATCH(0); + /* decl FS_S0 */ + OUT_BATCH(D0_DCL | + (REG_TYPE(FS_S0) << D0_TYPE_SHIFT) | + (REG_NR(FS_S0) << D0_NR_SHIFT) | + ((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); + OUT_BATCH(0); + OUT_BATCH(0); + /* texld(FS_OC, FS_S0, FS_T0 */ + OUT_BATCH(T0_TEXLD | + (REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) | + (REG_NR(FS_OC) << T0_DEST_NR_SHIFT) | + (REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT)); + OUT_BATCH((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) | + (REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT)); + OUT_BATCH(0); + } + + OUT_BATCH(PRIM3D_RECTLIST | (3*4 - 1)); + emit_vertex(dst_x + TILE_SIZE); + emit_vertex(dst_y + TILE_SIZE); + emit_vertex_normalized(src_x + TILE_SIZE, buf_width(src)); + emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); + + emit_vertex(dst_x); + emit_vertex(dst_y + TILE_SIZE); + emit_vertex_normalized(src_x, buf_width(src)); + emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); + + emit_vertex(dst_x); + emit_vertex(dst_y); + emit_vertex_normalized(src_x, buf_width(src)); + emit_vertex_normalized(src_y, buf_height(src)); + + if (!(keep_gpu_busy_counter & 1) && !fence_storm) + keep_gpu_busy(); + + keep_gpu_busy_counter++; + + intel_batchbuffer_flush(batch); +} + |