diff options
author | Damien Lespiau <damien.lespiau@intel.com> | 2013-02-14 18:50:06 +0000 |
---|---|---|
committer | Damien Lespiau <damien.lespiau@intel.com> | 2013-02-14 19:00:37 +0000 |
commit | 89cabd785121aa92514c70d479b0b5453ef88e04 (patch) | |
tree | 2fb240b9e9ad79c73afb226d3dfb44ec1407570f | |
parent | 50c45f9586843bb3b83d9bed5d9738145ba05866 (diff) | |
parent | ba2885b09e7c3f4870e4423abbbde6f432ee2378 (diff) |
assembler: Merge the assembler branchassembler-merged
Conflicts:
configure.ac: minor conflict with Ben's dumper work
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
78 files changed, 15697 insertions, 2 deletions
@@ -79,3 +79,10 @@ core *.swo *.swp cscope.* +TAGS + +/assembler/gram.c +/assembler/gram.h +/assembler/intel-gen4asm +/assembler/intel-gen4disasm +/assembler/lex.c diff --git a/Makefile.am b/Makefile.am index 20bca79..67b6563 100644 --- a/Makefile.am +++ b/Makefile.am @@ -23,6 +23,10 @@ ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4 SUBDIRS = lib man tools scripts benchmarks demos +if BUILD_ASSEMBLER +SUBDIRS += assembler +endif + if BUILD_SHADER_DEBUGGER SUBDIRS += debugger endif diff --git a/assembler/.gitignore b/assembler/.gitignore new file mode 100644 index 0000000..ed1de4e --- /dev/null +++ b/assembler/.gitignore @@ -0,0 +1,18 @@ +.deps +Makefile +Makefile.in +aclocal.m4 +autom4te.cache +configure +configure.lineno +config.log +config.status +depcomp +install-sh +missing +*.o + +src/intel-gen4asm +src/gram.c +src/gram.h +src/lex.c diff --git a/assembler/Makefile.am b/assembler/Makefile.am new file mode 100644 index 0000000..95ba08d --- /dev/null +++ b/assembler/Makefile.am @@ -0,0 +1,51 @@ +SUBDIRS = doc test + +noinst_LTLIBRARIES = libbrw.la + +bin_PROGRAMS = intel-gen4asm intel-gen4disasm + +libbrw_la_SOURCES = \ + brw_compat.h \ + brw_context.c \ + brw_context.h \ + brw_disasm.c \ + brw_defines.h \ + brw_eu.h \ + brw_eu.c \ + brw_eu_compact.c \ + brw_eu_debug.c \ + brw_eu_emit.c \ + brw_eu_util.c \ + brw_reg.h \ + brw_structs.h \ + ralloc.c \ + ralloc.h \ + $(NULL) + +AM_YFLAGS = -d --warnings=all +AM_CFLAGS= $(ASSEMBLER_WARN_CFLAGS) + +LEX = flex -i +BUILT_SOURCES = gram.h gram.c lex.c +gram.h: gram.c + +intel_gen4asm_SOURCES = \ + gen4asm.h \ + gram.y \ + lex.l \ + main.c \ + $(NULL) + +intel_gen4asm_LDADD = libbrw.la + +intel_gen4disasm_SOURCES = disasm-main.c +intel_gen4disasm_LDADD = libbrw.la + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = intel-gen4asm.pc + +MAINTAINERCLEANFILES = $(BUILT_SOURCES) +EXTRA_DIST = \ + README \ + TODO \ + intel-gen4asm.pc.in diff --git a/assembler/README b/assembler/README new file mode 100644 index 0000000..bfc9586 --- /dev/null +++ b/assembler/README @@ -0,0 +1,9 @@ +intel-gen4asm is a program to compile an assembly language for the Intel 965 +Express Chipset. It has been used to construct programs for textured video in +the 2d driver. + +Some examples of gen4 assembly programs are in the doc/examples directory. + +Note that the language parsed by this assembler is not exactly what the final +language is going to look like. In particular, the send instructions need to +be cleaned up and made more reasonable to program with. diff --git a/assembler/TODO b/assembler/TODO new file mode 100644 index 0000000..59e4abf --- /dev/null +++ b/assembler/TODO @@ -0,0 +1,14 @@ +- Add support for push, pop, msave, and mrest instructions +- Fix up send argument formatting for some send instructions +- Add send arguments for more send instructions +- Fix up the sets of registers allowed for send arguments +- manpage +- binary output? +- check for more error cases. +- boolean types in parser internal structs where appropriate +- replace GL* with non-GL? +- support labels for branch/jump instruction destinations +- support math on immediate operand values +- break/cont syntax should be better +- valgrind it +- do something to allow use as a library? diff --git a/assembler/brw_compat.h b/assembler/brw_compat.h new file mode 100644 index 0000000..4bf7f31 --- /dev/null +++ b/assembler/brw_compat.h @@ -0,0 +1,67 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * To share code with mesa without having to do big modifications and still be + * able to sync files together at a later point, this file holds macros and + * types defined in mesa's core headers. + */ + +#ifndef __BRW_COMPAT_H__ +#define __BRW_COMPAT_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * * __builtin_expect macros + * */ +#if !defined(__GNUC__) +# define __builtin_expect(x, y) (x) +#endif + +#ifndef likely +# ifdef __GNUC__ +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +# else +# define likely(x) (x) +# define unlikely(x) (x) +# endif +#endif + +#if (__GNUC__ >= 3) +#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a))) +#else +#define PRINTFLIKE(f, a) +#endif + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#define Elements(x) ARRAY_SIZE(x) + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* __BRW_COMPAT_H__ */ diff --git a/assembler/brw_context.c b/assembler/brw_context.c new file mode 100644 index 0000000..6f2a964 --- /dev/null +++ b/assembler/brw_context.c @@ -0,0 +1,44 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <string.h> + +#include "brw_context.h" + +static bool +intel_init_context(struct intel_context *intel, int gen) +{ + memset(intel, 0, sizeof(struct intel_context)); + intel->gen = gen / 10; + intel->is_haswell = gen == 75; + if (intel->gen >= 5) + intel->needs_ff_sync = true; + + return true; +} + +bool +brw_init_context(struct brw_context *brw, int gen) +{ + return intel_init_context(&brw->intel, gen); +} diff --git a/assembler/brw_context.h b/assembler/brw_context.h new file mode 100644 index 0000000..90e66f7 --- /dev/null +++ b/assembler/brw_context.h @@ -0,0 +1,78 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * To share code with mesa without having to do big modifications and still be + * able to sync files together at a later point, this file stubs the fields + * of struct brw_context used by the code we import. + */ + +#ifndef __BRW_CONTEXT_H__ +#define __BRW_CONTEXT_H__ + +#include <stdbool.h> +#include <stdio.h> + +#include "brw_structs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef INTEL_DEBUG +#define INTEL_DEBUG (0) +#endif + +struct intel_context +{ + int gen; + int gt; + bool is_haswell; + bool is_g4x; + bool needs_ff_sync; +}; + +struct brw_context +{ + struct intel_context intel; +}; + +bool +brw_init_context(struct brw_context *brw, int gen); + +/* brw_disasm.c */ +struct opcode_desc { + char *name; + int nsrc; + int ndst; +}; + +extern const struct opcode_desc opcode_descs[128]; + +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* __BRW_CONTEXT_H__ */ diff --git a/assembler/brw_defines.h b/assembler/brw_defines.h new file mode 100644 index 0000000..98757da --- /dev/null +++ b/assembler/brw_defines.h @@ -0,0 +1,1642 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low)) +#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK) +#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* 3D state: + */ +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */ +/* DW0 */ +# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10 +# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) +# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) +/* DW1 */ +# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) +# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 +#define BRW_CLIPMODE_KERNEL_CLIP 5 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GEN5 */ +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_SPRITE_POINT_ENABLE 16 + +#define BRW_CUT_INDEX_ENABLE (1 << 10) + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20 +#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10 +#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08 +#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04 +#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02 +#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 +/* These are listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * These appear to be supported on at least some + * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT + * is useful when using OpenGL to render to a FBO + * (which has the pixel coordinate Y orientation inverted + * with respect to the normal OpenGL pixel coordinate system). + */ +#define BRW_RASTRULE_LOWER_LEFT 2 +#define BRW_RASTRULE_LOWER_RIGHT 3 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +/* Surface state DW0 */ +#define BRW_SURFACE_RC_READ_WRITE (1 << 8) +#define BRW_SURFACE_MIPLAYOUT_SHIFT 10 +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 +#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f +#define BRW_SURFACE_BLEND_ENABLED (1 << 13) +#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14 +#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15 +#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16 +#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32A32_SFIXED 0x020 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R32G32B32_SFIXED 0x050 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_R32G32_SFIXED 0x0A0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C +#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180 +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F +#define BRW_SURFACEFORMAT_R32_SFIXED 0x1B2 +#define BRW_SURFACEFORMAT_R10G10B10A2_SNORM 0x1B3 +#define BRW_SURFACEFORMAT_R10G10B10A2_USCALED 0x1B4 +#define BRW_SURFACEFORMAT_R10G10B10A2_SSCALED 0x1B5 +#define BRW_SURFACEFORMAT_R10G10B10A2_SINT 0x1B6 +#define BRW_SURFACEFORMAT_B10G10R10A2_SNORM 0x1B7 +#define BRW_SURFACEFORMAT_B10G10R10A2_USCALED 0x1B8 +#define BRW_SURFACEFORMAT_B10G10R10A2_SSCALED 0x1B9 +#define BRW_SURFACEFORMAT_B10G10R10A2_UINT 0x1BA +#define BRW_SURFACEFORMAT_B10G10R10A2_SINT 0x1BB +#define BRW_SURFACE_FORMAT_SHIFT 18 +#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_TYPE_SHIFT 29 +#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29) +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define GEN7_SURFACE_IS_ARRAY (1 << 28) +#define GEN7_SURFACE_VALIGN_2 (0 << 16) +#define GEN7_SURFACE_VALIGN_4 (1 << 16) +#define GEN7_SURFACE_HALIGN_4 (0 << 15) +#define GEN7_SURFACE_HALIGN_8 (1 << 15) +#define GEN7_SURFACE_TILING_NONE (0 << 13) +#define GEN7_SURFACE_TILING_X (2 << 13) +#define GEN7_SURFACE_TILING_Y (3 << 13) +#define GEN7_SURFACE_ARYSPC_FULL (0 << 10) +#define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10) + +/* Surface state DW2 */ +#define BRW_SURFACE_HEIGHT_SHIFT 19 +#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19) +#define BRW_SURFACE_WIDTH_SHIFT 6 +#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6) +#define BRW_SURFACE_LOD_SHIFT 2 +#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2) +#define GEN7_SURFACE_HEIGHT_SHIFT 16 +#define GEN7_SURFACE_HEIGHT_MASK INTEL_MASK(29, 16) +#define GEN7_SURFACE_WIDTH_SHIFT 0 +#define GEN7_SURFACE_WIDTH_MASK INTEL_MASK(13, 0) + +/* Surface state DW3 */ +#define BRW_SURFACE_DEPTH_SHIFT 21 +#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21) +#define BRW_SURFACE_PITCH_SHIFT 3 +#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3) +#define BRW_SURFACE_TILED (1 << 1) +#define BRW_SURFACE_TILED_Y (1 << 0) + +/* Surface state DW4 */ +#define BRW_SURFACE_MIN_LOD_SHIFT 28 +#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28) +#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4) +#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4) +#define GEN7_SURFACE_MULTISAMPLECOUNT_1 (0 << 3) +#define GEN7_SURFACE_MULTISAMPLECOUNT_4 (2 << 3) +#define GEN7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3) +#define GEN7_SURFACE_MSFMT_MSS (0 << 6) +#define GEN7_SURFACE_MSFMT_DEPTH_STENCIL (1 << 6) + +/* Surface state DW5 */ +#define BRW_SURFACE_X_OFFSET_SHIFT 25 +#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25) +#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24) +#define BRW_SURFACE_Y_OFFSET_SHIFT 20 +#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20) +#define GEN7_SURFACE_MIN_LOD_SHIFT 4 +#define GEN7_SURFACE_MIN_LOD_MASK INTEL_MASK(7, 4) + +/* Surface state DW6 */ +#define GEN7_SURFACE_MCS_ENABLE (1 << 0) +#define GEN7_SURFACE_MCS_PITCH_SHIFT 3 +#define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3) + +/* Surface state DW7 */ +#define GEN7_SURFACE_SCS_R_SHIFT 25 +#define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) +#define GEN7_SURFACE_SCS_G_SHIFT 22 +#define GEN7_SURFACE_SCS_G_MASK INTEL_MASK(24, 22) +#define GEN7_SURFACE_SCS_B_SHIFT 19 +#define GEN7_SURFACE_SCS_B_MASK INTEL_MASK(21, 19) +#define GEN7_SURFACE_SCS_A_SHIFT 16 +#define GEN7_SURFACE_SCS_A_MASK INTEL_MASK(18, 16) + +/* The actual swizzle values/what channel to use */ +#define HSW_SCS_ZERO 0 +#define HSW_SCS_ONE 1 +#define HSW_SCS_RED 4 +#define HSW_SCS_GREEN 5 +#define HSW_SCS_BLUE 6 +#define HSW_SCS_ALPHA 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +enum brw_compression { + BRW_COMPRESSION_NONE = 0, + BRW_COMPRESSION_2NDHALF = 1, + BRW_COMPRESSION_COMPRESSED = 2, +}; + +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_R 7 +#define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_ACCUMULATOR_WRITE_DISABLE 0 +#define BRW_ACCUMULATOR_WRITE_ENABLE 1 + +/** @{ + * + * Gen6 has replaced "mask enable/disable" with WECtrl, which is + * effectively the same but much simpler to think about. Now, there + * are two contributors ANDed together to whether channels are + * executed: The predication on the instruction, and the channel write + * enable. + */ +/** + * This is the default value. It means that a channel's write enable is set + * if the per-channel IP is pointing at this instruction. + */ +#define BRW_WE_NORMAL 0 +/** + * This is used like BRW_MASK_DISABLE, and causes all channels to have + * their write enable set. Note that predication still contributes to + * whether the channel actually gets written. + */ +#define BRW_WE_ALL 1 +/** @} */ + +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_F32TO16 = 19, + BRW_OPCODE_F16TO32 = 20, + BRW_OPCODE_BFREV = 23, + BRW_OPCODE_BFE = 24, + BRW_OPCODE_BFI1 = 25, + BRW_OPCODE_BFI2 = 26, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_BRD = 33, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_BRC = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_CALL = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_RET = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_FBH = 75, + BRW_OPCODE_FBL = 76, + BRW_OPCODE_CBIT = 77, + BRW_OPCODE_ADDC = 78, + BRW_OPCODE_SUBB = 79, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_MAD = 91, + BRW_OPCODE_LRP = 92, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_INT_QUOTIENT, + SHADER_OPCODE_INT_REMAINDER, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, + + SHADER_OPCODE_TEX, + SHADER_OPCODE_TXD, + SHADER_OPCODE_TXF, + SHADER_OPCODE_TXL, + SHADER_OPCODE_TXS, + FS_OPCODE_TXB, + + SHADER_OPCODE_SHADER_TIME_ADD, + + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, + FS_OPCODE_VARYING_PULL_CONSTANT_LOAD, + FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, + FS_OPCODE_MOV_DISPATCH_TO_FLAGS, + FS_OPCODE_DISCARD_JUMP, + FS_OPCODE_SET_GLOBAL_OFFSET, + + VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, + VS_OPCODE_PULL_CONSTANT_LOAD, +}; + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_REGISTER_3SRC_TYPE_F 0 +#define BRW_REGISTER_3SRC_TYPE_D 1 +#define BRW_REGISTER_3SRC_TYPE_UD 2 +#define BRW_REGISTER_3SRC_TYPE_DF 3 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 +#define BRW_ARF_TDR 0xB0 +#define BRW_ARF_TIMESTAMP 0xC0 + +#define BRW_MRF_COMPR4 (1 << 7) + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +/** + * Message target: Shared Function ID for where to SEND a message. + * + * These are enumerated in the ISA reference under "send - Send Message". + * In particular, see the following tables: + * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" + * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" + * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) / + * Overview / GPE Function IDs + */ +enum brw_message_target { + BRW_SFID_NULL = 0, + BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */ + BRW_SFID_SAMPLER = 2, + BRW_SFID_MESSAGE_GATEWAY = 3, + BRW_SFID_DATAPORT_READ = 4, + BRW_SFID_DATAPORT_WRITE = 5, + BRW_SFID_URB = 6, + BRW_SFID_THREAD_SPAWNER = 7, + + GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, + GEN6_SFID_DATAPORT_RENDER_CACHE = 5, + GEN6_SFID_VME = 8, + GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, + + GEN7_SFID_DATAPORT_DATA_CACHE = 10, + + HSW_SFID_CRE = 0x0d, +}; + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN5_SAMPLER_MESSAGE_SAMPLE 0 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 +#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20 +#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29 +#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30 +#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31 + +/* for GEN5 only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +/* This one stays the same across generations. */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +/* GEN4 */ +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 +/* G45, GEN5 */ +#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 +#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 +/* GEN6 */ +#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 +#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +/* GEN6 */ +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 +#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 +#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 + +/* GEN7 */ +#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10 +#define GEN7_DATAPORT_DC_DWORD_SCATTERED_READ 3 + +/* dataport atomic operations. */ +#define BRW_AOP_AND 1 +#define BRW_AOP_OR 2 +#define BRW_AOP_XOR 3 +#define BRW_AOP_MOV 4 +#define BRW_AOP_INC 5 +#define BRW_AOP_DEC 6 +#define BRW_AOP_ADD 7 +#define BRW_AOP_SUB 8 +#define BRW_AOP_REVSUB 9 +#define BRW_AOP_IMAX 10 +#define BRW_AOP_IMIN 11 +#define BRW_AOP_UMAX 12 +#define BRW_AOP_UMIN 13 +#define BRW_AOP_CMPWR 14 +#define BRW_AOP_PREDEC 15 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CS_URB_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_SIP 0x6102 +#define CMD_PIPELINE_SELECT_965 0x6104 +#define CMD_PIPELINE_SELECT_GM45 0x6904 + +#define _3DSTATE_PIPELINED_POINTERS 0x7800 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801 +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) + +#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x7826 /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x7827 /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x7828 /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */ + +#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */ +# define PS_SAMPLER_STATE_CHANGE (1 << 12) +# define GS_SAMPLER_STATE_CHANGE (1 << 9) +# define VS_SAMPLER_STATE_CHANGE (1 << 8) +/* DW1: VS */ +/* DW2: GS */ +/* DW3: PS */ + +#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GEN7+ */ +#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */ +#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */ + +#define _3DSTATE_VERTEX_BUFFERS 0x7808 +# define BRW_VB0_INDEX_SHIFT 27 +# define GEN6_VB0_INDEX_SHIFT 26 +# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) +# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) +# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) +# define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14) +# define BRW_VB0_PITCH_SHIFT 0 + +#define _3DSTATE_VERTEX_ELEMENTS 0x7809 +# define BRW_VE0_INDEX_SHIFT 27 +# define GEN6_VE0_INDEX_SHIFT 26 +# define BRW_VE0_FORMAT_SHIFT 16 +# define BRW_VE0_VALID (1 << 26) +# define GEN6_VE0_VALID (1 << 25) +# define GEN6_VE0_EDGE_FLAG_ENABLE (1 << 15) +# define BRW_VE0_SRC_OFFSET_SHIFT 0 +# define BRW_VE1_COMPONENT_NOSTORE 0 +# define BRW_VE1_COMPONENT_STORE_SRC 1 +# define BRW_VE1_COMPONENT_STORE_0 2 +# define BRW_VE1_COMPONENT_STORE_1_FLT 3 +# define BRW_VE1_COMPONENT_STORE_1_INT 4 +# define BRW_VE1_COMPONENT_STORE_VID 5 +# define BRW_VE1_COMPONENT_STORE_IID 6 +# define BRW_VE1_COMPONENT_STORE_PID 7 +# define BRW_VE1_COMPONENT_0_SHIFT 28 +# define BRW_VE1_COMPONENT_1_SHIFT 24 +# define BRW_VE1_COMPONENT_2_SHIFT 20 +# define BRW_VE1_COMPONENT_3_SHIFT 16 +# define BRW_VE1_DST_OFFSET_SHIFT 0 + +#define CMD_INDEX_BUFFER 0x780a +#define GEN4_3DSTATE_VF_STATISTICS 0x780b +#define GM45_3DSTATE_VF_STATISTICS 0x680b +#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */ +#define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GEN7+ */ +#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GEN7+ */ + +#define _3DSTATE_URB 0x7805 /* GEN6 */ +# define GEN6_URB_VS_SIZE_SHIFT 16 +# define GEN6_URB_VS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_URB_GS_SIZE_SHIFT 0 + +#define _3DSTATE_VF 0x780c /* GEN7.5+ */ +#define HSW_CUT_INDEX_ENABLE (1 << 8) + +#define _3DSTATE_URB_VS 0x7830 /* GEN7+ */ +#define _3DSTATE_URB_HS 0x7831 /* GEN7+ */ +#define _3DSTATE_URB_DS 0x7832 /* GEN7+ */ +#define _3DSTATE_URB_GS 0x7833 /* GEN7+ */ +# define GEN7_URB_ENTRY_SIZE_SHIFT 16 +# define GEN7_URB_STARTING_ADDRESS_SHIFT 25 + +#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */ +#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */ +# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 + +#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) +# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) +# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) + +#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GEN7+ */ +#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */ + +#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ + +#define _3DSTATE_VS 0x7810 /* GEN6+ */ +/* DW2 */ +# define GEN6_VS_SPF_MODE (1 << 31) +# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW4 */ +# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 +# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW5 */ +# define GEN6_VS_MAX_THREADS_SHIFT 25 +# define HSW_VS_MAX_THREADS_SHIFT 23 +# define GEN6_VS_STATISTICS_ENABLE (1 << 10) +# define GEN6_VS_CACHE_DISABLE (1 << 1) +# define GEN6_VS_ENABLE (1 << 0) + +#define _3DSTATE_GS 0x7811 /* GEN6+ */ +/* DW2 */ +# define GEN6_GS_SPF_MODE (1 << 31) +# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW4 */ +# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 +# define GEN7_GS_INCLUDE_VERTEX_HANDLES (1 << 10) +# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_GS_MAX_THREADS_SHIFT 25 +# define GEN6_GS_STATISTICS_ENABLE (1 << 10) +# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) +# define GEN6_GS_RENDERING_ENABLE (1 << 8) +# define GEN7_GS_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_GS_REORDER (1 << 30) +# define GEN6_GS_DISCARD_ADJACENCY (1 << 29) +# define GEN6_GS_SVBI_PAYLOAD_ENABLE (1 << 28) +# define GEN6_GS_SVBI_POSTINCREMENT_ENABLE (1 << 27) +# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT 16 +# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16) +# define GEN6_GS_ENABLE (1 << 15) + +# define BRW_GS_EDGE_INDICATOR_0 (1 << 8) +# define BRW_GS_EDGE_INDICATOR_1 (1 << 9) + +#define _3DSTATE_HS 0x781B /* GEN7+ */ +#define _3DSTATE_TE 0x781C /* GEN7+ */ +#define _3DSTATE_DS 0x781D /* GEN7+ */ + +#define _3DSTATE_CLIP 0x7812 /* GEN6+ */ +/* DW1 */ +# define GEN7_CLIP_WINDING_CW (0 << 20) +# define GEN7_CLIP_WINDING_CCW (1 << 20) +# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19) +# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19) +# define GEN7_CLIP_EARLY_CULL (1 << 18) +# define GEN7_CLIP_CULLMODE_BOTH (0 << 16) +# define GEN7_CLIP_CULLMODE_NONE (1 << 16) +# define GEN7_CLIP_CULLMODE_FRONT (2 << 16) +# define GEN7_CLIP_CULLMODE_BACK (3 << 16) +# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/** + * Just does cheap culling based on the clip distance. Bits must be + * disjoint with USER_CLIP_CLIP_DISTANCE bits. + */ +# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 +/* DW2 */ +# define GEN6_CLIP_ENABLE (1 << 31) +# define GEN6_CLIP_API_OGL (0 << 30) +# define GEN6_CLIP_API_D3D (1 << 30) +# define GEN6_CLIP_XY_TEST (1 << 28) +# define GEN6_CLIP_Z_TEST (1 << 27) +# define GEN6_CLIP_GB_TEST (1 << 26) +/** 8-bit field of which user clip distances to clip aganist. */ +# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 +# define GEN6_CLIP_MODE_NORMAL (0 << 13) +# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) +# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) +# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) +# define GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE (1 << 8) +# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 +# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 +# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 +/* DW3 */ +# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 +# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 +# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) + +#define _3DSTATE_SF 0x7813 /* GEN6+ */ +/* DW1 (for gen6) */ +# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) +# define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 << 20) +# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) +# define GEN6_SF_STATISTICS_ENABLE (1 << 10) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) +# define GEN6_SF_FRONT_SOLID (0 << 5) +# define GEN6_SF_FRONT_WIREFRAME (1 << 5) +# define GEN6_SF_FRONT_POINT (2 << 5) +# define GEN6_SF_BACK_SOLID (0 << 3) +# define GEN6_SF_BACK_WIREFRAME (1 << 3) +# define GEN6_SF_BACK_POINT (2 << 3) +# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) +# define GEN6_SF_WINDING_CCW (1 << 0) +/* DW3 */ +# define GEN6_SF_LINE_AA_ENABLE (1 << 31) +# define GEN6_SF_CULL_BOTH (0 << 29) +# define GEN6_SF_CULL_NONE (1 << 29) +# define GEN6_SF_CULL_FRONT (2 << 29) +# define GEN6_SF_CULL_BACK (3 << 29) +# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ +# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) +# define GEN6_SF_SCISSOR_ENABLE (1 << 11) +# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) +# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) +# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) +# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) +/* DW4 */ +# define GEN6_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 +# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) +# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) +# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) +# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) +# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) +# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ +/* DW5: depth offset constant */ +/* DW6: depth offset scale */ +/* DW7: depth offset clamp */ +/* DW8 */ +# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) +# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) +# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) +# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) +# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 +# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 +# define ATTRIBUTE_1_SOURCE_SHIFT 16 +# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) +# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) +# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) +# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) +# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 +# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 +# define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + +/* DW16: Point sprite texture coordinate enables */ +/* DW17: Constant interpolation enables */ +/* DW18: attr 0-7 wrap shortest enables */ +/* DW19: attr 8-16 wrap shortest enables */ + +/* On GEN7, many fields of 3DSTATE_SF were split out into a new command: + * 3DSTATE_SBE. The remaining fields live in different DWords, but retain + * the same bit-offset. The only new field: + */ +/* GEN7/DW1: */ +# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12 +/* GEN7/DW2: */ +# define HSW_SF_LINE_STIPPLE_ENABLE 14 + +#define _3DSTATE_SBE 0x781F /* GEN7+ */ +/* DW1 */ +# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28) +# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22 +# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21) +# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */ +/* DW10: Point sprite texture coordinate enables */ +/* DW11: Constant interpolation enables */ +/* DW12: attr 0-7 wrap shortest enables */ +/* DW13: attr 8-16 wrap shortest enables */ + +enum brw_wm_barycentric_interp_mode { + BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0, + BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC = 1, + BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC = 2, + BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC = 3, + BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC = 4, + BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC = 5, + BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT = 6 +}; +#define BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS \ + ((1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC) | \ + (1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC) | \ + (1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC)) + +#define _3DSTATE_WM 0x7814 /* GEN6+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN6_WM_SPF_MODE (1 << 31) +# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW3: scratch space */ +/* DW4 */ +# define GEN6_WM_STATISTICS_ENABLE (1 << 31) +# define GEN6_WM_DEPTH_CLEAR (1 << 30) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 +/* DW5 */ +# define GEN6_WM_MAX_THREADS_SHIFT 25 +# define GEN6_WM_KILL_ENABLE (1 << 22) +# define GEN6_WM_COMPUTED_DEPTH (1 << 21) +# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN6_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) +# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) +# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) +# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) +# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) +# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11) +# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN6_WM_USES_SOURCE_W (1 << 8) +# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_WM_POSOFFSET_NONE (0 << 18) +# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) +# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) +# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) +# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) +# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) +# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +# define GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 10 +# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) +# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) +# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) +# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) +# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERSAMPLE (0 << 0) +# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) +/* DW7: kernel 1 pointer */ +/* DW8: kernel 2 pointer */ + +#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */ +#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */ +#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */ +# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) +# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) +# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) +# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) + +#define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */ +#define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */ + +#define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */ +/* DW1 */ +# define SO_FUNCTION_ENABLE (1 << 31) +# define SO_RENDERING_DISABLE (1 << 30) +/* This selects which incoming rendering stream goes down the pipeline. The + * rendering stream is 0 if not defined by special cases in the GS state. + */ +# define SO_RENDER_STREAM_SELECT_SHIFT 27 +# define SO_RENDER_STREAM_SELECT_MASK INTEL_MASK(28, 27) +/* Controls reordering of TRISTRIP_* elements in stream output (not rendering). + */ +# define SO_REORDER_TRAILING (1 << 26) +/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */ +# define SO_STATISTICS_ENABLE (1 << 25) +# define SO_BUFFER_ENABLE(n) (1 << (8 + (n))) +/* DW2 */ +# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT 29 +# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK INTEL_MASK(29, 29) +# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT 24 +# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK INTEL_MASK(28, 24) +# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT 21 +# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK INTEL_MASK(21, 21) +# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT 16 +# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK INTEL_MASK(20, 16) +# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT 13 +# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK INTEL_MASK(13, 13) +# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT 8 +# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK INTEL_MASK(12, 8) +# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT 5 +# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK INTEL_MASK(5, 5) +# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0 +# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0) + +/* 3DSTATE_WM for Gen7 */ +/* DW1 */ +# define GEN7_WM_STATISTICS_ENABLE (1 << 31) +# define GEN7_WM_DEPTH_CLEAR (1 << 30) +# define GEN7_WM_DISPATCH_ENABLE (1 << 29) +# define GEN7_WM_DEPTH_RESOLVE (1 << 28) +# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN7_WM_KILL_ENABLE (1 << 25) +# define GEN7_WM_PSCDEPTH_OFF (0 << 23) +# define GEN7_WM_PSCDEPTH_ON (1 << 23) +# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23) +# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23) +# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN7_WM_USES_SOURCE_W (1 << 19) +# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17) +# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17) +# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17) +# define GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 11 +# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) +# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6) +# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6) +# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6) +# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6) +# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4) +# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3) +# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) +# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0) +# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0) +# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0) +# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0) +/* DW2 */ +# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31) +# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) + +#define _3DSTATE_PS 0x7820 /* GEN7+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN7_PS_SPF_MODE (1 << 31) +# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN7_PS_SAMPLER_COUNT_SHIFT 27 +# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW3: scratch space */ +/* DW4 */ +# define IVB_PS_MAX_THREADS_SHIFT 24 +# define HSW_PS_MAX_THREADS_SHIFT 23 +# define HSW_PS_SAMPLE_MASK_SHIFT 12 +# define HSW_PS_SAMPLE_MASK_MASK INTEL_MASK(19, 12) +# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11) +# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10) +# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN7_PS_POSOFFSET_NONE (0 << 3) +# define GEN7_PS_POSOFFSET_CENTROID (2 << 3) +# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3) +# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2) +# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1) +# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0) +/* DW5 */ +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0 +/* DW6: kernel 1 pointer */ +/* DW7: kernel 2 pointer */ + +#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */ + +#define _3DSTATE_DRAWING_RECTANGLE 0x7900 +#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901 +#define _3DSTATE_CHROMA_KEY 0x7904 +#define _3DSTATE_DEPTH_BUFFER 0x7905 /* GEN4-6 */ +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907 +#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 +#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */ + +#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */ +/* DW1 */ +# define SVB_INDEX_SHIFT 29 +# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ +/* DW2: SVB index */ +/* DW3: SVB maximum index */ + +#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */ +/* DW1 */ +# define MS_PIXEL_LOCATION_CENTER (0 << 4) +# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_4 (2 << 1) +# define MS_NUMSAMPLES_8 (3 << 1) + +#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */ +#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */ + +#define GEN7_3DSTATE_CLEAR_PARAMS 0x7804 +#define GEN7_3DSTATE_DEPTH_BUFFER 0x7805 +#define GEN7_3DSTATE_STENCIL_BUFFER 0x7806 +# define HSW_STENCIL_ENABLED (1 << 31) +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER 0x7807 + +#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK, SNB */ +# define GEN5_DEPTH_CLEAR_VALID (1 << 15) +/* DW1: depth clear value */ +/* DW2 */ +# define GEN7_DEPTH_CLEAR_VALID (1 << 0) + +#define _3DSTATE_SO_DECL_LIST 0x7917 /* GEN7+ */ +/* DW1 */ +# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT 12 +# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK INTEL_MASK(15, 12) +# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT 8 +# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK INTEL_MASK(11, 8) +# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT 4 +# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK INTEL_MASK(7, 4) +# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT 0 +# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK INTEL_MASK(3, 0) +/* DW2 */ +# define SO_NUM_ENTRIES_3_SHIFT 24 +# define SO_NUM_ENTRIES_3_MASK INTEL_MASK(31, 24) +# define SO_NUM_ENTRIES_2_SHIFT 16 +# define SO_NUM_ENTRIES_2_MASK INTEL_MASK(23, 16) +# define SO_NUM_ENTRIES_1_SHIFT 8 +# define SO_NUM_ENTRIES_1_MASK INTEL_MASK(15, 8) +# define SO_NUM_ENTRIES_0_SHIFT 0 +# define SO_NUM_ENTRIES_0_MASK INTEL_MASK(7, 0) + +/* SO_DECL DW0 */ +# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT 12 +# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK INTEL_MASK(13, 12) +# define SO_DECL_HOLE_FLAG (1 << 11) +# define SO_DECL_REGISTER_INDEX_SHIFT 4 +# define SO_DECL_REGISTER_INDEX_MASK INTEL_MASK(9, 4) +# define SO_DECL_COMPONENT_MASK_SHIFT 0 +# define SO_DECL_COMPONENT_MASK_MASK INTEL_MASK(3, 0) + +#define _3DSTATE_SO_BUFFER 0x7918 /* GEN7+ */ +/* DW1 */ +# define SO_BUFFER_INDEX_SHIFT 29 +# define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29) +# define SO_BUFFER_PITCH_SHIFT 0 +# define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0) +/* DW2: start address */ +/* DW3: end address. */ + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Bitfields for the URB_WRITE message, DW2 of message header: */ +#define URB_WRITE_PRIM_END 0x1 +#define URB_WRITE_PRIM_START 0x2 +#define URB_WRITE_PRIM_TYPE_SHIFT 2 + + +/* Maximum number of entries that can be addressed using a binding table + * pointer of type SURFTYPE_BUFFER + */ +#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27) + +#define EX_DESC_SFID_MASK 0xF +#define EX_DESC_EOT_MASK 0x20 + +#endif diff --git a/assembler/brw_disasm.c b/assembler/brw_disasm.c new file mode 100644 index 0000000..4dec829 --- /dev/null +++ b/assembler/brw_disasm.c @@ -0,0 +1,1348 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <stdarg.h> + +#include "brw_compat.h" +#include "brw_context.h" +#include "brw_defines.h" + +const struct opcode_desc opcode_descs[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; +static const struct opcode_desc *opcode = opcode_descs; + +static const char * const conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +static const char * const negate[2] = { + [0] = "", + [1] = "-", +}; + +static const char * const _abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +static const char * const vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +static const char * const width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +static const char * const horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +static const char * const chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +static const char * const debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +static const char * const saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +static const char * const accwr[2] = { + [0] = "", + [1] = "AccWrEnable" +}; + +static const char * const wectrl[2] = { + [0] = "WE_normal", + [1] = "WE_all" +}; + +static const char * const exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +static const char * const pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +static const char * const pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +static const char * const pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +static const char * const thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +static const char * const compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", + [3] = "compr4", +}; + +static const char * const dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +static const char * const mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +static const char * const access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +static const char * const reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +const int reg_type_size[8] = { + [0] = 4, + [1] = 4, + [2] = 2, + [3] = 2, + [4] = 1, + [5] = 1, + [7] = 4 +}; + +static const char * const reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +static const char * const writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +static const char * const end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +static const char * const target_function[16] = { + [BRW_SFID_NULL] = "null", + [BRW_SFID_MATH] = "math", + [BRW_SFID_SAMPLER] = "sampler", + [BRW_SFID_MESSAGE_GATEWAY] = "gateway", + [BRW_SFID_DATAPORT_READ] = "read", + [BRW_SFID_DATAPORT_WRITE] = "write", + [BRW_SFID_URB] = "urb", + [BRW_SFID_THREAD_SPAWNER] = "thread_spawner" +}; + +static const char * const target_function_gen6[16] = { + [BRW_SFID_NULL] = "null", + [BRW_SFID_MATH] = "math", + [BRW_SFID_SAMPLER] = "sampler", + [BRW_SFID_MESSAGE_GATEWAY] = "gateway", + [BRW_SFID_URB] = "urb", + [BRW_SFID_THREAD_SPAWNER] = "thread_spawner", + [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler", + [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render", + [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const", + [GEN7_SFID_DATAPORT_DATA_CACHE] = "data" +}; + +static const char * const dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + +static const char * const math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod", +}; + +static const char * const math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +static const char * const math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +static const char * const math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +static const char * const math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +static const char * const urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + +static const char * const urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +static const char * const urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +static const char * const urb_used[2] = { + [0] = "", + [1] = "used" +}; + +static const char * const urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +static const char * const sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + + +static int column; + +static int string (FILE *file, const char *string) +{ + fputs (string, file); + column += strlen (string); + return 0; +} + +static int format (FILE *f, const char *format, ...) PRINTFLIKE(2, 3); +static int format (FILE *f, const char *format, ...) +{ + char buf[1024]; + va_list args; + va_start (args, format); + + vsnprintf (buf, sizeof (buf) - 1, format, args); + va_end (args); + string (f, buf); + return 0; +} + +static int newline (FILE *f) +{ + putc ('\n', f); + column = 0; + return 0; +} + +static int pad (FILE *f, int c) +{ + do + string (f, " "); + while (column < c); + return 0; +} + +static int control (FILE *file, const char *name, const char * const ctrl[], + unsigned id, int *space) +{ + if (!ctrl[id]) { + fprintf (file, "*** invalid %s value %d ", + name, id); + return 1; + } + if (ctrl[id][0]) + { + if (space && *space) + string (file, " "); + string (file, ctrl[id]); + if (space) + *space = 1; + } + return 0; +} + +static int print_opcode (FILE *file, int id) +{ + if (!opcode[id].name) { + format (file, "*** invalid opcode value %d ", id); + return 1; + } + string (file, opcode[id].name); + return 0; +} + +static int reg (FILE *file, unsigned _reg_file, unsigned _reg_nr) +{ + int err = 0; + + /* Clear the Compr4 instruction compression bit. */ + if (_reg_file == BRW_MESSAGE_REGISTER_FILE) + _reg_nr &= ~(1 << 7); + + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string (file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format (file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format (file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_FLAG: + format (file, "f%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format (file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format (file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format (file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format (file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format (file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string (file, "ip"); + return -1; + break; + default: + format (file, "ARF%d", _reg_nr); + break; + } + } else { + err |= control (file, "src reg file", reg_file, _reg_file, NULL); + format (file, "%d", _reg_nr); + } + return err; +} + +static int dest (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + + if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da1.dest_subreg_nr) + format (file, ".%d", inst->bits1.da1.dest_subreg_nr / + reg_type_size[inst->bits1.da1.dest_reg_type]); + format (file, "<%s>", horiz_stride[inst->bits1.da1.dest_horiz_stride]); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } + else + { + string (file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr / + reg_type_size[inst->bits1.ia1.dest_reg_type]); + if (inst->bits1.ia1.dest_indirect_offset) + format (file, " %d", inst->bits1.ia1.dest_indirect_offset); + string (file, "]"); + format (file, "<%s>", horiz_stride[inst->bits1.ia1.dest_horiz_stride]); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } + else + { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da16.dest_subreg_nr) + format (file, ".%d", inst->bits1.da16.dest_subreg_nr / + reg_type_size[inst->bits1.da16.dest_reg_type]); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } + else + { + err = 1; + string (file, "Indirect align16 address mode not supported"); + } + } + + return 0; +} + +static int dest_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + uint32_t reg_file; + + if (inst->bits1.da3src.dest_reg_file) + reg_file = BRW_MESSAGE_REGISTER_FILE; + else + reg_file = BRW_GENERAL_REGISTER_FILE; + + err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da3src.dest_subreg_nr) + format (file, ".%d", inst->bits1.da3src.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL); + + return 0; +} + +static int src_align1_region (FILE *file, + unsigned _vert_stride, unsigned _width, unsigned _horiz_stride) +{ + int err = 0; + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ","); + err |= control (file, "width", width, _width, NULL); + string (file, ","); + err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string (file, ">"); + return err; +} + +static int src_da1 (FILE *file, unsigned type, unsigned _reg_file, + unsigned _vert_stride, unsigned _width, unsigned _horiz_stride, + unsigned reg_num, unsigned sub_reg_num, unsigned __abs, unsigned _negate) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, reg_num); + if (err == -1) + return 0; + if (sub_reg_num) + format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_ia1 (FILE *file, + unsigned type, + unsigned _reg_file, + int _addr_imm, + unsigned _addr_subreg_nr, + unsigned _negate, + unsigned __abs, + unsigned _addr_mode, + unsigned _horiz_stride, + unsigned _width, + unsigned _vert_stride) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + string (file, "g[a0"); + if (_addr_subreg_nr) + format (file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format (file, " %d", _addr_imm); + string (file, "]"); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_da16 (FILE *file, + unsigned _reg_type, + unsigned _reg_file, + unsigned _vert_stride, + unsigned _reg_nr, + unsigned _subreg_nr, + unsigned __abs, + unsigned _negate, + unsigned swz_x, + unsigned swz_y, + unsigned swz_z, + unsigned swz_w) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, _reg_nr); + if (err == -1) + return 0; + if (_subreg_nr) + /* bit4 for subreg number byte addressing. Make this same meaning as + in da1 case, so output looks consistent. */ + format (file, ".%d", 16 / reg_type_size[_reg_type]); + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ",4,1>"); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + return err; +} + +static int src0_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + unsigned swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3; + unsigned swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3; + unsigned swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3; + unsigned swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3; + + err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr); + if (err == -1) + return 0; + if (inst->bits2.da3src.src0_subreg_nr) + format (file, ".%d", inst->bits2.da3src.src0_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + +static int src1_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + unsigned swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3; + unsigned swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3; + unsigned swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3; + unsigned swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3; + unsigned src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low | + (inst->bits3.da3src.src1_subreg_nr_high << 2)); + + err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate, + NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, + inst->bits3.da3src.src1_reg_nr); + if (err == -1) + return 0; + if (src1_subreg_nr) + format (file, ".%d", src1_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int src2_3src (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + unsigned swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3; + unsigned swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3; + unsigned swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3; + unsigned swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3; + + err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate, + NULL); + err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL); + + err |= reg (file, BRW_GENERAL_REGISTER_FILE, + inst->bits3.da3src.src2_reg_nr); + if (err == -1) + return 0; + if (inst->bits3.da3src.src2_subreg_nr) + format (file, ".%d", inst->bits3.da3src.src2_subreg_nr); + string (file, "<4,1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, + BRW_REGISTER_TYPE_F, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + +static int imm (FILE *file, unsigned type, struct brw_instruction *inst) { + switch (type) { + case BRW_REGISTER_TYPE_UD: + format (file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format (file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format (file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format (file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format (file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format (file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format (file, "%-gF", inst->bits3.f); + } + return 0; +} + +static int src0 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src0_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } + else + { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +static int src1 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src1_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } + else + { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +int esize[6] = { + [0] = 1, + [1] = 2, + [2] = 4, + [3] = 8, + [4] = 16, + [5] = 32, +}; + +static int qtr_ctrl(FILE *file, struct brw_instruction *inst) +{ + int qtr_ctl = inst->header.compression_control; + int exec_size = esize[inst->header.execution_size]; + + if (exec_size == 8) { + switch (qtr_ctl) { + case 0: + string (file, " 1Q"); + break; + case 1: + string (file, " 2Q"); + break; + case 2: + string (file, " 3Q"); + break; + case 3: + string (file, " 4Q"); + break; + } + } else if (exec_size == 16){ + if (qtr_ctl < 2) + string (file, " 1H"); + else + string (file, " 2H"); + } + return 0; +} + +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) +{ + int err = 0; + int space = 0; + + if (inst->header.predicate_control) { + string (file, "("); + err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0); + if (inst->bits2.da1.flag_subreg_nr) + format (file, ".%d", inst->bits2.da1.flag_subreg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + err |= control (file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + err |= control (file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string (file, ") "); + } + + err |= print_opcode (file, inst->header.opcode); + err |= control (file, "saturate", saturate, inst->header.saturate, NULL); + err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode == BRW_OPCODE_MATH) { + string (file, " "); + err |= control (file, "function", math_function, + inst->header.destreg__conditionalmod, NULL); + } else if (inst->header.opcode != BRW_OPCODE_SEND && + inst->header.opcode != BRW_OPCODE_SENDC) { + err |= control (file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + /* If we're using the conditional modifier, print which flags reg is + * used for it. Note that on gen6+, the embedded-condition SEL and + * control flow doesn't update flags. + */ + if (inst->header.destreg__conditionalmod && + (gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL && + inst->header.opcode != BRW_OPCODE_IF && + inst->header.opcode != BRW_OPCODE_WHILE))) { + format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0); + if (inst->bits2.da1.flag_subreg_nr) + format (file, ".%d", inst->bits2.da1.flag_subreg_nr); + } + } + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "("); + err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); + string (file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6) + format (file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].nsrc == 3) { + pad (file, 16); + err |= dest_3src (file, inst); + + pad (file, 32); + err |= src0_3src (file, inst); + + pad (file, 48); + err |= src1_3src (file, inst); + + pad (file, 64); + err |= src2_3src (file, inst); + } else { + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { + format (file, " %d", inst->bits3.break_cont.jip); + } else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF || + inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { + format (file, " %d", inst->bits1.branch_gen6.jump_count); + } else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK || + inst->header.opcode == BRW_OPCODE_CONTINUE || + inst->header.opcode == BRW_OPCODE_HALT)) || + (gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) { + format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip); + } else if (inst->header.opcode == BRW_OPCODE_JMPI) { + format (file, " %d", inst->bits3.d); + } + + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } + } + + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) { + enum brw_message_target target; + + if (gen >= 6) + target = inst->header.destreg__conditionalmod; + else if (gen == 5) + target = inst->bits2.send_gen5.sfid; + else + target = inst->bits3.generic.msg_target; + + newline (file); + pad (file, 16); + space = 0; + + if (gen >= 6) { + err |= control (file, "target function", target_function_gen6, + target, &space); + } else { + err |= control (file, "target function", target_function, + target, &space); + } + + switch (target) { + case BRW_SFID_MATH: + err |= control (file, "math function", math_function, + inst->bits3.math.function, &space); + err |= control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + err |= control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + err |= control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + err |= control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_SFID_SAMPLER: + if (gen >= 7) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen7.binding_table_index, + inst->bits3.sampler_gen7.sampler, + inst->bits3.sampler_gen7.msg_type, + inst->bits3.sampler_gen7.simd_mode); + } else if (gen >= 5) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.sampler_gen5.binding_table_index, + inst->bits3.sampler_gen5.sampler, + inst->bits3.sampler_gen5.msg_type, + inst->bits3.sampler_gen5.simd_mode); + } else if (0 /* FINISHME: is_g4x */) { + format (file, " (%d, %d)", + inst->bits3.sampler_g4x.binding_table_index, + inst->bits3.sampler_g4x.sampler); + } else { + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", + sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + } + break; + case BRW_SFID_DATAPORT_READ: + if (gen >= 6) { + format (file, " (%d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else if (gen >= 5 /* FINISHME: || is_g4x */) { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read_gen5.binding_table_index, + inst->bits3.dp_read_gen5.msg_control, + inst->bits3.dp_read_gen5.msg_type); + } else { + format (file, " (%d, %d, %d)", + inst->bits3.dp_read.binding_table_index, + inst->bits3.dp_read.msg_control, + inst->bits3.dp_read.msg_type); + } + break; + + case BRW_SFID_DATAPORT_WRITE: + if (gen >= 7) { + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen7_dp.msg_type, &space); + + format (file, ", %d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + } else if (gen == 6) { + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d)", + inst->bits3.gen6_dp.binding_table_index, + inst->bits3.gen6_dp.msg_control, + inst->bits3.gen6_dp.msg_type, + inst->bits3.gen6_dp.send_commit_msg); + } else { + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.last_render_target << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + } + break; + + case BRW_SFID_URB: + if (gen >= 5) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } + + space = 1; + if (gen >= 5) { + err |= control (file, "urb opcode", urb_opcode, + inst->bits3.urb_gen5.opcode, &space); + } + err |= control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + err |= control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + err |= control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + err |= control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_SFID_THREAD_SPAWNER: + break; + case GEN7_SFID_DATAPORT_DATA_CACHE: + format (file, " (%d, %d, %d)", + inst->bits3.gen7_dp.binding_table_index, + inst->bits3.gen7_dp.msg_control, + inst->bits3.gen7_dp.msg_type); + break; + + + default: + format (file, "unsupported target %d", target); + break; + } + if (space) + string (file, " "); + if (gen >= 5) { + format (file, "mlen %d", + inst->bits3.generic_gen5.msg_length); + format (file, " rlen %d", + inst->bits3.generic_gen5.response_length); + } else { + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + } + pad (file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "{"); + space = 1; + err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); + if (gen >= 6) + err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space); + else + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + + if (gen >= 6) + err |= qtr_ctrl (file, inst); + else { + if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED && + opcode[inst->header.opcode].ndst > 0 && + inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE && + inst->bits1.da1.dest_reg_nr & (1 << 7)) { + format (file, " compr4"); + } else { + err |= control (file, "compression control", compr_ctrl, + inst->header.compression_control, &space); + } + } + + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (gen >= 6) + err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND || + inst->header.opcode == BRW_OPCODE_SENDC) + err |= control (file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string (file, " "); + string (file, "}"); + } + string (file, ";"); + newline (file); + return err; +} diff --git a/assembler/brw_eu.c b/assembler/brw_eu.c new file mode 100644 index 0000000..d874b79 --- /dev/null +++ b/assembler/brw_eu.c @@ -0,0 +1,268 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include <string.h> + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + +#include "ralloc.h" + +/* Returns the corresponding conditional mod for swapping src0 and + * src1 in e.g. CMP. + */ +uint32_t +brw_swap_cmod(uint32_t cmod) +{ + switch (cmod) { + case BRW_CONDITIONAL_Z: + case BRW_CONDITIONAL_NZ: + return cmod; + case BRW_CONDITIONAL_G: + return BRW_CONDITIONAL_L; + case BRW_CONDITIONAL_GE: + return BRW_CONDITIONAL_LE; + case BRW_CONDITIONAL_L: + return BRW_CONDITIONAL_G; + case BRW_CONDITIONAL_LE: + return BRW_CONDITIONAL_GE; + default: + return ~0; + } +} + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse) +{ + p->current->header.predicate_inverse = predicate_inverse; +} + +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) +{ + p->current->header.destreg__conditionalmod = conditional; +} + +void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg) +{ + p->current->bits2.da1.flag_reg_nr = reg; + p->current->bits2.da1.flag_subreg_nr = subreg; +} + +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void +brw_set_compression_control(struct brw_compile *p, + enum brw_compression compression_control) +{ + p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); + + if (p->brw->intel.gen >= 6) { + /* Since we don't use the 32-wide support in gen6, we translate + * the pre-gen6 compression control here. + */ + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1Q; + break; + case BRW_COMPRESSION_2NDHALF: + /* For 8-wide, this is "use the second set of 8 bits." */ + p->current->header.compression_control = GEN6_COMPRESSION_2Q; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For 16-wide instruction compression, use the first set of 16 bits + * since we don't do 32-wide dispatch. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + default: + assert(!"not reached"); + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + } + } else { + p->current->header.compression_control = compression_control; + } +} + +void brw_set_mask_control( struct brw_compile *p, unsigned value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, bool enable ) +{ + p->current->header.saturate = enable; +} + +void brw_set_acc_write_control(struct brw_compile *p, unsigned value) +{ + if (p->brw->intel.gen >= 6) + p->current->header.acc_wr_control = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->compressed_stack[p->current - p->stack] = p->compressed; + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; + p->compressed = p->compressed_stack[p->current - p->stack]; +} + + +/*********************************************************************** + */ +void +brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) +{ + memset(p, 0, sizeof(*p)); + + p->brw = brw; + /* + * Set the initial instruction store array size to 1024, if found that + * isn't enough, then it will double the store size at brw_next_insn() + * until out of memory. + */ + p->store_size = 1024; + p->store = rzalloc_array(mem_ctx, struct brw_instruction, p->store_size); + p->nr_insn = 0; + p->current = p->stack; + p->compressed = false; + memset(p->current, 0, sizeof(p->current[0])); + + p->mem_ctx = mem_ctx; + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); + + /* Set up control flow stack */ + p->if_stack_depth = 0; + p->if_stack_array_size = 16; + p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size); + + p->loop_stack_depth = 0; + p->loop_stack_array_size = 16; + p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); + p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); + + brw_init_compaction_tables(&brw->intel); +} + + +const unsigned *brw_get_program( struct brw_compile *p, + unsigned *sz ) +{ + brw_compact_instructions(p); + + *sz = p->next_insn_offset; + return (const unsigned *)p->store; +} + +void +brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + void *store = p->store; + bool dump_hex = false; + + for (int offset = start; offset < end;) { + struct brw_instruction *insn = store + offset; + struct brw_instruction uncompacted; + printf("0x%08x: ", offset); + + if (insn->header.cmpt_control) { + struct brw_compact_instruction *compacted = (void *)insn; + if (dump_hex) { + printf("0x%08x 0x%08x ", + ((uint32_t *)insn)[1], + ((uint32_t *)insn)[0]); + } + + brw_uncompact_instruction(intel, &uncompacted, compacted); + insn = &uncompacted; + offset += 8; + } else { + if (dump_hex) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)insn)[3], + ((uint32_t *)insn)[2], + ((uint32_t *)insn)[1], + ((uint32_t *)insn)[0]); + } + offset += 16; + } + + brw_disasm(stdout, insn, p->brw->intel.gen); + } +} diff --git a/assembler/brw_eu.h b/assembler/brw_eu.h new file mode 100644 index 0000000..427db37 --- /dev/null +++ b/assembler/brw_eu.h @@ -0,0 +1,427 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include <stdbool.h> +#include <stdio.h> +#include "brw_context.h" +#include "brw_structs.h" +#include "brw_defines.h" +#include "brw_reg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define BRW_EU_MAX_INSN_STACK 5 + +struct brw_compile { + struct brw_instruction *store; + int store_size; + unsigned nr_insn; + unsigned int next_insn_offset; + + void *mem_ctx; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + unsigned flag_value; + bool single_program_flow; + bool compressed; + struct brw_context *brw; + + /* Control flow stacks: + * - if_stack contains IF and ELSE instructions which must be patched + * (and popped) once the matching ENDIF instruction is encountered. + * + * Just store the instruction pointer(an index). + */ + int *if_stack; + int if_stack_depth; + int if_stack_array_size; + + /** + * loop_stack contains the instruction pointers of the starts of loops which + * must be patched (and popped) once the matching WHILE instruction is + * encountered. + */ + int *loop_stack; + /** + * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF + * blocks they were popping out of, to fix up the mask stack. This tracks + * the IF/ENDIF nesting in each current nested loop level. + */ + int *if_depth_in_loop; + int loop_stack_depth; + int loop_stack_array_size; +}; + +static inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, unsigned value ); +void brw_set_saturate( struct brw_compile *p, bool enable ); +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ); +void brw_set_compression_control(struct brw_compile *p, enum brw_compression c); +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ); +void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse); +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ); +void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg); +void brw_set_acc_write_control(struct brw_compile *p, unsigned value); + +void brw_init_compile(struct brw_context *, struct brw_compile *p, + void *mem_ctx); +void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end); +const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); + +struct brw_instruction *brw_next_insn(struct brw_compile *p, unsigned opcode); +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); + +void gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + unsigned msg_reg_nr); + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +#define ALU3(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1, \ + struct brw_reg src2); + +#define ROUND(OP) \ +void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(AVG) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) +ALU2(PLN) +ALU3(MAD) + +ROUND(RNDZ) +ROUND(RNDE) + +#undef ALU1 +#undef ALU2 +#undef ALU3 +#undef ROUND + + +/* Helpers for SEND instruction: + */ +void brw_set_sampler_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned sampler, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + unsigned header_present, + unsigned simd_mode, + unsigned return_format); + +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + bool header_present, + unsigned response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + bool header_present, + unsigned last_render_target, + unsigned response_length, + unsigned end_of_thread, + unsigned send_commit_msg); + +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool eot, + bool writes_complete, + unsigned offset, + unsigned swizzle); + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + unsigned response_length, + bool eot); + +void brw_svb_write(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + bool send_commit_msg); + +void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned msg_control, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + bool eot, + bool header_present); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + unsigned header_present, + unsigned simd_mode, + unsigned return_format); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ); + +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + struct brw_reg src0, + struct brw_reg src1); + +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index); + +void brw_oword_block_read_scratch(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + unsigned offset); + +void brw_oword_block_write_scratch(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + unsigned offset); + +void brw_shader_time_add(struct brw_compile *p, + int mrf, + uint32_t surf_index); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + unsigned execute_size); +struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1); + +void brw_ELSE(struct brw_compile *p); +void brw_ENDIF(struct brw_compile *p); + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +struct brw_instruction *gen6_CONT(struct brw_compile *p); +struct brw_instruction *gen6_HALT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx); + + + +void brw_NOP(struct brw_compile *p); + +void brw_WAIT(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1); + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg); + +void brw_set_uip_jip(struct brw_compile *p); + +uint32_t brw_swap_cmod(uint32_t cmod); + +void +brw_set_3src_dest(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg dest); +void +brw_set_3src_src0(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg src0); +void +brw_set_3src_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg src1); +void +brw_set_3src_src2(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg src2); + +/* brw_eu_compact.c */ +void brw_init_compaction_tables(struct intel_context *intel); +void brw_compact_instructions(struct brw_compile *p); +void brw_uncompact_instruction(struct intel_context *intel, + struct brw_instruction *dst, + struct brw_compact_instruction *src); +bool brw_try_compact_instruction(struct brw_compile *p, + struct brw_compact_instruction *dst, + struct brw_instruction *src); + +void brw_debug_compact_uncompact(struct intel_context *intel, + struct brw_instruction *orig, + struct brw_instruction *uncompacted); + +/* brw_optimize.c */ +void brw_optimize(struct brw_compile *p); +void brw_remove_duplicate_mrf_moves(struct brw_compile *p); +void brw_remove_grf_to_mrf_moves(struct brw_compile *p); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/assembler/brw_eu_compact.c b/assembler/brw_eu_compact.c new file mode 100644 index 0000000..d362ed3 --- /dev/null +++ b/assembler/brw_eu_compact.c @@ -0,0 +1,810 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file brw_eu_compact.c + * + * Instruction compaction is a feature of gm45 and newer hardware that allows + * for a smaller instruction encoding. + * + * The instruction cache is on the order of 32KB, and many programs generate + * far more instructions than that. The instruction cache is built to barely + * keep up with instruction dispatch abaility in cache hit cases -- L1 + * instruction cache misses that still hit in the next level could limit + * throughput by around 50%. + * + * The idea of instruction compaction is that most instructions use a tiny + * subset of the GPU functionality, so we can encode what would be a 16 byte + * instruction in 8 bytes using some lookup tables for various fields. + */ + +#include <string.h> + +#include "brw_compat.h" +#include "brw_context.h" +#include "brw_eu.h" + +static const uint32_t gen6_control_index_table[32] = { + 0b00000000000000000, + 0b01000000000000000, + 0b00110000000000000, + 0b00000000100000000, + 0b00010000000000000, + 0b00001000100000000, + 0b00000000100000010, + 0b00000000000000010, + 0b01000000100000000, + 0b01010000000000000, + 0b10110000000000000, + 0b00100000000000000, + 0b11010000000000000, + 0b11000000000000000, + 0b01001000100000000, + 0b01000000000001000, + 0b01000000000000100, + 0b00000000000001000, + 0b00000000000000100, + 0b00111000100000000, + 0b00001000100000010, + 0b00110000100000000, + 0b00110000000000001, + 0b00100000000000001, + 0b00110000000000010, + 0b00110000000000101, + 0b00110000000001001, + 0b00110000000010000, + 0b00110000000000011, + 0b00110000000000100, + 0b00110000100001000, + 0b00100000000001001 +}; + +static const uint32_t gen6_datatype_table[32] = { + 0b001001110000000000, + 0b001000110000100000, + 0b001001110000000001, + 0b001000000001100000, + 0b001010110100101001, + 0b001000000110101101, + 0b001100011000101100, + 0b001011110110101101, + 0b001000000111101100, + 0b001000000001100001, + 0b001000110010100101, + 0b001000000001000001, + 0b001000001000110001, + 0b001000001000101001, + 0b001000000000100000, + 0b001000001000110010, + 0b001010010100101001, + 0b001011010010100101, + 0b001000000110100101, + 0b001100011000101001, + 0b001011011000101100, + 0b001011010110100101, + 0b001011110110100101, + 0b001111011110111101, + 0b001111011110111100, + 0b001111011110111101, + 0b001111011110011101, + 0b001111011110111110, + 0b001000000000100001, + 0b001000000000100010, + 0b001001111111011101, + 0b001000001110111110, +}; + +static const uint32_t gen6_subreg_table[32] = { + 0b000000000000000, + 0b000000000000100, + 0b000000110000000, + 0b111000000000000, + 0b011110000001000, + 0b000010000000000, + 0b000000000010000, + 0b000110000001100, + 0b001000000000000, + 0b000001000000000, + 0b000001010010100, + 0b000000001010110, + 0b010000000000000, + 0b110000000000000, + 0b000100000000000, + 0b000000010000000, + 0b000000000001000, + 0b100000000000000, + 0b000001010000000, + 0b001010000000000, + 0b001100000000000, + 0b000000001010100, + 0b101101010010100, + 0b010100000000000, + 0b000000010001111, + 0b011000000000000, + 0b111110000000000, + 0b101000000000000, + 0b000000000001111, + 0b000100010001111, + 0b001000010001111, + 0b000110000000000, +}; + +static const uint32_t gen6_src_index_table[32] = { + 0b000000000000, + 0b010110001000, + 0b010001101000, + 0b001000101000, + 0b011010010000, + 0b000100100000, + 0b010001101100, + 0b010101110000, + 0b011001111000, + 0b001100101000, + 0b010110001100, + 0b001000100000, + 0b010110001010, + 0b000000000010, + 0b010101010000, + 0b010101101000, + 0b111101001100, + 0b111100101100, + 0b011001110000, + 0b010110001001, + 0b010101011000, + 0b001101001000, + 0b010000101100, + 0b010000000000, + 0b001101110000, + 0b001100010000, + 0b001100000000, + 0b010001101010, + 0b001101111000, + 0b000001110000, + 0b001100100000, + 0b001101010000, +}; + +static const uint32_t gen7_control_index_table[32] = { + 0b0000000000000000010, + 0b0000100000000000000, + 0b0000100000000000001, + 0b0000100000000000010, + 0b0000100000000000011, + 0b0000100000000000100, + 0b0000100000000000101, + 0b0000100000000000111, + 0b0000100000000001000, + 0b0000100000000001001, + 0b0000100000000001101, + 0b0000110000000000000, + 0b0000110000000000001, + 0b0000110000000000010, + 0b0000110000000000011, + 0b0000110000000000100, + 0b0000110000000000101, + 0b0000110000000000111, + 0b0000110000000001001, + 0b0000110000000001101, + 0b0000110000000010000, + 0b0000110000100000000, + 0b0001000000000000000, + 0b0001000000000000010, + 0b0001000000000000100, + 0b0001000000100000000, + 0b0010110000000000000, + 0b0010110000000010000, + 0b0011000000000000000, + 0b0011000000100000000, + 0b0101000000000000000, + 0b0101000000100000000 +}; + +static const uint32_t gen7_datatype_table[32] = { + 0b001000000000000001, + 0b001000000000100000, + 0b001000000000100001, + 0b001000000001100001, + 0b001000000010111101, + 0b001000001011111101, + 0b001000001110100001, + 0b001000001110100101, + 0b001000001110111101, + 0b001000010000100001, + 0b001000110000100000, + 0b001000110000100001, + 0b001001010010100101, + 0b001001110010100100, + 0b001001110010100101, + 0b001111001110111101, + 0b001111011110011101, + 0b001111011110111100, + 0b001111011110111101, + 0b001111111110111100, + 0b000000001000001100, + 0b001000000000111101, + 0b001000000010100101, + 0b001000010000100000, + 0b001001010010100100, + 0b001001110010000100, + 0b001010010100001001, + 0b001101111110111101, + 0b001111111110111101, + 0b001011110110101100, + 0b001010010100101000, + 0b001010110100101000 +}; + +static const uint32_t gen7_subreg_table[32] = { + 0b000000000000000, + 0b000000000000001, + 0b000000000001000, + 0b000000000001111, + 0b000000000010000, + 0b000000010000000, + 0b000000100000000, + 0b000000110000000, + 0b000001000000000, + 0b000001000010000, + 0b000010100000000, + 0b001000000000000, + 0b001000000000001, + 0b001000010000001, + 0b001000010000010, + 0b001000010000011, + 0b001000010000100, + 0b001000010000111, + 0b001000010001000, + 0b001000010001110, + 0b001000010001111, + 0b001000110000000, + 0b001000111101000, + 0b010000000000000, + 0b010000110000000, + 0b011000000000000, + 0b011110010000111, + 0b100000000000000, + 0b101000000000000, + 0b110000000000000, + 0b111000000000000, + 0b111000000011100 +}; + +static const uint32_t gen7_src_index_table[32] = { + 0b000000000000, + 0b000000000010, + 0b000000010000, + 0b000000010010, + 0b000000011000, + 0b000000100000, + 0b000000101000, + 0b000001001000, + 0b000001010000, + 0b000001110000, + 0b000001111000, + 0b001100000000, + 0b001100000010, + 0b001100001000, + 0b001100010000, + 0b001100010010, + 0b001100100000, + 0b001100101000, + 0b001100111000, + 0b001101000000, + 0b001101000010, + 0b001101001000, + 0b001101010000, + 0b001101100000, + 0b001101101000, + 0b001101110000, + 0b001101110001, + 0b001101111000, + 0b010001101000, + 0b010001101001, + 0b010001101010, + 0b010110001000 +}; + +static const uint32_t *control_index_table; +static const uint32_t *datatype_table; +static const uint32_t *subreg_table; +static const uint32_t *src_index_table; + +static bool +set_control_index(struct intel_context *intel, + struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t *src_u32 = (uint32_t *)src; + uint32_t uncompacted = 0; + + uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0; + uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16; + /* On gen7, the flag register number gets integrated into the control + * index. + */ + if (intel->gen >= 7) + uncompacted |= ((src_u32[2] >> 25) & 0x3) << 17; + + for (int i = 0; i < 32; i++) { + if (control_index_table[i] == uncompacted) { + dst->dw0.control_index = i; + return true; + } + } + + return false; +} + +static bool +set_datatype_index(struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t uncompacted = 0; + + uncompacted |= src->bits1.ud & 0x7fff; + uncompacted |= (src->bits1.ud >> 29) << 15; + + for (int i = 0; i < 32; i++) { + if (datatype_table[i] == uncompacted) { + dst->dw0.data_type_index = i; + return true; + } + } + + return false; +} + +static bool +set_subreg_index(struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t uncompacted = 0; + + uncompacted |= src->bits1.da1.dest_subreg_nr << 0; + uncompacted |= src->bits2.da1.src0_subreg_nr << 5; + uncompacted |= src->bits3.da1.src1_subreg_nr << 10; + + for (int i = 0; i < 32; i++) { + if (subreg_table[i] == uncompacted) { + dst->dw0.sub_reg_index = i; + return true; + } + } + + return false; +} + +static bool +get_src_index(uint32_t uncompacted, + uint32_t *compacted) +{ + for (int i = 0; i < 32; i++) { + if (src_index_table[i] == uncompacted) { + *compacted = i; + return true; + } + } + + return false; +} + +static bool +set_src0_index(struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t compacted, uncompacted = 0; + + uncompacted |= (src->bits2.ud >> 13) & 0xfff; + + if (!get_src_index(uncompacted, &compacted)) + return false; + + dst->dw0.src0_index = compacted & 0x3; + dst->dw1.src0_index = compacted >> 2; + + return true; +} + +static bool +set_src1_index(struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + uint32_t compacted, uncompacted = 0; + + uncompacted |= (src->bits3.ud >> 13) & 0xfff; + + if (!get_src_index(uncompacted, &compacted)) + return false; + + dst->dw1.src1_index = compacted; + + return true; +} + +/** + * Tries to compact instruction src into dst. + * + * It doesn't modify dst unless src is compactable, which is relied on by + * brw_compact_instructions(). + */ +bool +brw_try_compact_instruction(struct brw_compile *p, + struct brw_compact_instruction *dst, + struct brw_instruction *src) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + struct brw_compact_instruction temp; + + if (src->header.opcode == BRW_OPCODE_IF || + src->header.opcode == BRW_OPCODE_ELSE || + src->header.opcode == BRW_OPCODE_ENDIF || + src->header.opcode == BRW_OPCODE_HALT || + src->header.opcode == BRW_OPCODE_DO || + src->header.opcode == BRW_OPCODE_WHILE) { + /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs + * to be able to handle compacted flow control instructions.. + */ + return false; + } + + /* FINISHME: immediates */ + if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE || + src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return false; + + memset(&temp, 0, sizeof(temp)); + + temp.dw0.opcode = src->header.opcode; + temp.dw0.debug_control = src->header.debug_control; + if (!set_control_index(intel, &temp, src)) + return false; + if (!set_datatype_index(&temp, src)) + return false; + if (!set_subreg_index(&temp, src)) + return false; + temp.dw0.acc_wr_control = src->header.acc_wr_control; + temp.dw0.conditionalmod = src->header.destreg__conditionalmod; + if (intel->gen <= 6) + temp.dw0.flag_subreg_nr = src->bits2.da1.flag_subreg_nr; + temp.dw0.cmpt_ctrl = 1; + if (!set_src0_index(&temp, src)) + return false; + if (!set_src1_index(&temp, src)) + return false; + temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr; + temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr; + temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr; + + *dst = temp; + + return true; +} + +static void +set_uncompacted_control(struct intel_context *intel, + struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t *dst_u32 = (uint32_t *)dst; + uint32_t uncompacted = control_index_table[src->dw0.control_index]; + + dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8; + dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31; + + if (intel->gen >= 7) + dst_u32[2] |= ((uncompacted >> 17) & 0x3) << 25; +} + +static void +set_uncompacted_datatype(struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t uncompacted = datatype_table[src->dw0.data_type_index]; + + dst->bits1.ud &= ~(0x7 << 29); + dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29; + dst->bits1.ud &= ~0x7fff; + dst->bits1.ud |= uncompacted & 0x7fff; +} + +static void +set_uncompacted_subreg(struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t uncompacted = subreg_table[src->dw0.sub_reg_index]; + + dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0) & 0x1f; + dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5) & 0x1f; + dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f; +} + +static void +set_uncompacted_src0(struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2; + uint32_t uncompacted = src_index_table[compacted]; + + dst->bits2.ud |= uncompacted << 13; +} + +static void +set_uncompacted_src1(struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + uint32_t uncompacted = src_index_table[src->dw1.src1_index]; + + dst->bits3.ud |= uncompacted << 13; +} + +void +brw_uncompact_instruction(struct intel_context *intel, + struct brw_instruction *dst, + struct brw_compact_instruction *src) +{ + memset(dst, 0, sizeof(*dst)); + + dst->header.opcode = src->dw0.opcode; + dst->header.debug_control = src->dw0.debug_control; + + set_uncompacted_control(intel, dst, src); + set_uncompacted_datatype(dst, src); + set_uncompacted_subreg(dst, src); + dst->header.acc_wr_control = src->dw0.acc_wr_control; + dst->header.destreg__conditionalmod = src->dw0.conditionalmod; + if (intel->gen <= 6) + dst->bits2.da1.flag_subreg_nr = src->dw0.flag_subreg_nr; + set_uncompacted_src0(dst, src); + set_uncompacted_src1(dst, src); + dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr; + dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr; + dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr; +} + +void brw_debug_compact_uncompact(struct intel_context *intel, + struct brw_instruction *orig, + struct brw_instruction *uncompacted) +{ + fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n", + intel->gen); + + fprintf(stderr, " before: "); + brw_disasm(stderr, orig, intel->gen); + + fprintf(stderr, " after: "); + brw_disasm(stderr, uncompacted, intel->gen); + + uint32_t *before_bits = (uint32_t *)orig; + uint32_t *after_bits = (uint32_t *)uncompacted; + printf(" changed bits:\n"); + for (int i = 0; i < 128; i++) { + uint32_t before = before_bits[i / 32] & (1 << (i & 31)); + uint32_t after = after_bits[i / 32] & (1 << (i & 31)); + + if (before != after) { + printf(" bit %d, %s to %s\n", i, + before ? "set" : "unset", + after ? "set" : "unset"); + } + } +} + +static int +compacted_between(int old_ip, int old_target_ip, int *compacted_counts) +{ + int this_compacted_count = compacted_counts[old_ip]; + int target_compacted_count = compacted_counts[old_target_ip]; + return target_compacted_count - this_compacted_count; +} + +static void +update_uip_jip(struct brw_instruction *insn, int this_old_ip, + int *compacted_counts) +{ + int target_old_ip; + + target_old_ip = this_old_ip + insn->bits3.break_cont.jip; + insn->bits3.break_cont.jip -= compacted_between(this_old_ip, + target_old_ip, + compacted_counts); + + target_old_ip = this_old_ip + insn->bits3.break_cont.uip; + insn->bits3.break_cont.uip -= compacted_between(this_old_ip, + target_old_ip, + compacted_counts); +} + +void +brw_init_compaction_tables(struct intel_context *intel) +{ + assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0); + assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); + assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); + assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0); + assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0); + assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); + assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); + assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0); + + switch (intel->gen) { + case 7: + control_index_table = gen7_control_index_table; + datatype_table = gen7_datatype_table; + subreg_table = gen7_subreg_table; + src_index_table = gen7_src_index_table; + break; + case 6: + control_index_table = gen6_control_index_table; + datatype_table = gen6_datatype_table; + subreg_table = gen6_subreg_table; + src_index_table = gen6_src_index_table; + break; + default: + return; + } +} + +void +brw_compact_instructions(struct brw_compile *p) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + void *store = p->store; + /* For an instruction at byte offset 8*i before compaction, this is the number + * of compacted instructions that preceded it. + */ + int compacted_counts[p->next_insn_offset / 8]; + /* For an instruction at byte offset 8*i after compaction, this is the + * 8-byte offset it was at before compaction. + */ + int old_ip[p->next_insn_offset / 8]; + + if (intel->gen < 6) + return; + + int src_offset; + int offset = 0; + int compacted_count = 0; + for (src_offset = 0; src_offset < p->nr_insn * 16;) { + struct brw_instruction *src = store + src_offset; + void *dst = store + offset; + + old_ip[offset / 8] = src_offset / 8; + compacted_counts[src_offset / 8] = compacted_count; + + struct brw_instruction saved = *src; + + if (!src->header.cmpt_control && + brw_try_compact_instruction(p, dst, src)) { + compacted_count++; + + if (INTEL_DEBUG) { + struct brw_instruction uncompacted; + brw_uncompact_instruction(intel, &uncompacted, dst); + if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { + brw_debug_compact_uncompact(intel, &saved, &uncompacted); + } + } + + offset += 8; + src_offset += 16; + } else { + int size = src->header.cmpt_control ? 8 : 16; + + /* It appears that the end of thread SEND instruction needs to be + * aligned, or the GPU hangs. + */ + if ((src->header.opcode == BRW_OPCODE_SEND || + src->header.opcode == BRW_OPCODE_SENDC) && + src->bits3.generic.end_of_thread && + (offset & 8) != 0) { + struct brw_compact_instruction *align = store + offset; + memset(align, 0, sizeof(*align)); + align->dw0.opcode = BRW_OPCODE_NOP; + align->dw0.cmpt_ctrl = 1; + offset += 8; + old_ip[offset / 8] = src_offset / 8; + dst = store + offset; + } + + /* If we didn't compact this intruction, we need to move it down into + * place. + */ + if (offset != src_offset) { + memmove(dst, src, size); + } + offset += size; + src_offset += size; + } + } + + /* Fix up control flow offsets. */ + p->next_insn_offset = offset; + for (offset = 0; offset < p->next_insn_offset;) { + struct brw_instruction *insn = store + offset; + int this_old_ip = old_ip[offset / 8]; + int this_compacted_count = compacted_counts[this_old_ip]; + int target_old_ip, target_compacted_count; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + case BRW_OPCODE_CONTINUE: + case BRW_OPCODE_HALT: + update_uip_jip(insn, this_old_ip, compacted_counts); + break; + + case BRW_OPCODE_IF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_WHILE: + if (intel->gen == 6) { + target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count; + target_compacted_count = compacted_counts[target_old_ip]; + insn->bits1.branch_gen6.jump_count -= (target_compacted_count - + this_compacted_count); + } else { + update_uip_jip(insn, this_old_ip, compacted_counts); + } + break; + } + + if (insn->header.cmpt_control) { + offset += 8; + } else { + offset += 16; + } + } + + /* p->nr_insn is counting the number of uncompacted instructions still, so + * divide. We do want to be sure there's a valid instruction in any + * alignment padding, so that the next compression pass (for the FS 8/16 + * compile passes) parses correctly. + */ + if (p->next_insn_offset & 8) { + struct brw_compact_instruction *align = store + offset; + memset(align, 0, sizeof(*align)); + align->dw0.opcode = BRW_OPCODE_NOP; + align->dw0.cmpt_ctrl = 1; + p->next_insn_offset += 8; + } + p->nr_insn = p->next_insn_offset / 16; + + if (0) { + fprintf(stdout, "dumping compacted program\n"); + brw_dump_compile(p, stdout, 0, p->next_insn_offset); + + int cmp = 0; + for (offset = 0; offset < p->next_insn_offset;) { + struct brw_instruction *insn = store + offset; + + if (insn->header.cmpt_control) { + offset += 8; + cmp++; + } else { + offset += 16; + } + } + fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8, + cmp * 8 * 100 / (offset + cmp * 8)); + } +} diff --git a/assembler/brw_eu_debug.c b/assembler/brw_eu_debug.c new file mode 100644 index 0000000..b446007 --- /dev/null +++ b/assembler/brw_eu_debug.c @@ -0,0 +1,92 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ + printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ + printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + printf("imm %f", hwreg.dw1.f); + } + else { + printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1<<hwreg.width, + hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0, + type[hwreg.type]); + } +} + + + diff --git a/assembler/brw_eu_emit.c b/assembler/brw_eu_emit.c new file mode 100644 index 0000000..23f0da5 --- /dev/null +++ b/assembler/brw_eu_emit.c @@ -0,0 +1,2627 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <string.h> + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + +#include "ralloc.h" + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) +{ + if (reg.width == BRW_WIDTH_8 && p->compressed) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +/** + * Prior to Sandybridge, the SEND instruction accepted non-MRF source + * registers, implicitly moving the operand to a message register. + * + * On Sandybridge, this is no longer the case. This function performs the + * explicit move; it should be called before emitting a SEND instruction. + */ +void +gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + unsigned msg_reg_nr) +{ + struct intel_context *intel = &p->brw->intel; + if (intel->gen < 6) + return; + + if (src->file == BRW_MESSAGE_REGISTER_FILE) + return; + + if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), + retype(*src, BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state(p); + } + *src = brw_message_reg(msg_reg_nr); +} + +static void +gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) +{ + /* From the BSpec / ISA Reference / send - [DevIVB+]: + * "The send with EOT should use register space R112-R127 for <src>. This is + * to enable loading of a new thread into the same slot while the message + * with EOT for current thread is pending dispatch." + * + * Since we're pretending to have 16 MRFs anyway, we may as well use the + * registers required for messages with EOT. + */ + struct intel_context *intel = &p->brw->intel; + if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { + reg->file = BRW_GENERAL_REGISTER_FILE; + reg->nr += GEN7_MRF_HACK_START; + } +} + + +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) +{ + if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && + dest.file != BRW_MESSAGE_REGISTER_FILE) + assert(dest.nr < 128); + + gen7_convert_mrf_to_grf(p, &dest); + + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.da16.dest_horiz_stride = 1; + } + } + else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + /* even ignored in da16, still need to set as '01' */ + insn->bits1.ia16.dest_horiz_stride = 1; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(p, insn, dest); +} + +extern int reg_type_size[]; + +static void +validate_reg(struct brw_instruction *insn, struct brw_reg reg) +{ + int hstride_for_reg[] = {0, 1, 2, 4}; + int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; + int width_for_reg[] = {1, 2, 4, 8, 16}; + int execsize_for_reg[] = {1, 2, 4, 8, 16, 32}; + int width, hstride, vstride, execsize; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + /* 3.3.6: Region Parameters. Restriction: Immediate vectors + * mean the destination has to be 128-bit aligned and the + * destination horiz stride has to be a word. + */ + if (reg.type == BRW_REGISTER_TYPE_V) { + assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * + reg_type_size[insn->bits1.da1.dest_reg_type] == 2); + } + + return; + } + + if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.file == BRW_ARF_NULL) + return; + + assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); + hstride = hstride_for_reg[reg.hstride]; + + if (reg.vstride == 0xf) { + vstride = -1; + } else { + assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); + vstride = vstride_for_reg[reg.vstride]; + } + + assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); + width = width_for_reg[reg.width]; + + assert(insn->header.execution_size >= 0 && + insn->header.execution_size < Elements(execsize_for_reg)); + execsize = execsize_for_reg[insn->header.execution_size]; + + /* Restrictions from 3.3.10: Register Region Restrictions. */ + /* 3. */ + assert(execsize >= width); + + /* FIXME: the assembler has a lot of code written that triggers the + * assertions commented it below. Let's paper over it (for now!) until we + * can re-validate the shaders with those little inconsistencies fixed. */ + + /* 4. */ +#if 0 + if (execsize == width && hstride != 0) { + assert(vstride == -1 || vstride == width * hstride); + } +#endif + + /* 5. */ + if (execsize == width && hstride == 0) { + /* no restriction on vstride. */ + } + + /* 6. */ +#if 0 + if (width == 1) { + assert(hstride == 0); + } +#endif + + /* 7. */ +#if 0 + if (execsize == 1 && width == 1) { + assert(hstride == 0); + assert(vstride == 0); + } +#endif + + /* 8. */ + if (vstride == 0 && hstride == 0) { + assert(width == 1); + } + + /* 10. Check destination issues. */ +} + +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + + if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + + gen7_convert_mrf_to_grf(p, ®); + + if (intel->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND || + insn->header.opcode == BRW_OPCODE_SENDC)) { + /* Any source modifiers or regions will be ignored, since this just + * identifies the MRF/GRF to start reading the message contents from. + * Check for some likely failures. + */ + assert(!reg.negate); + assert(!reg.abs); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + } + + validate_reg(insn, reg); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + + /* FIXME: This looks quite wrong, tempering with src1. I did not find + * anything in the bspec that was hinting it woud be needed when setting + * src0. before removing this one needs to run piglit. + + insn->bits1.da1.src1_reg_file = 0; + insn->bits1.da1.src1_reg_type = reg.type; + */ + } + else + { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } + else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + + /* FIXME: While this is correct, if the assembler uses that code path + * the opcode generated are different and thus needs a validation + * pass. + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + */ + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + /* } */ + } + else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + + gen7_convert_mrf_to_grf(p, ®); + + validate_reg(insn, reg); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + insn->bits3.da1.src1_address_mode = reg.address_mode; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* It's only BRW that does not support register-indirect addressing on + * src1 */ + assert (intel->gen >= 4 || reg.address_mode == BRW_ADDRESS_DIRECT); + + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + } + else { + insn->bits3.ia1.src1_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) + insn->bits3.ia1.src1_indirect_offset = reg.dw1.bits.indirect_offset; + else + insn->bits3.ia16.src1_indirect_offset = reg.dw1.bits.indirect_offset / 16; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + /* FIXME: While this is correct, if the assembler uses that code path + * the opcode generated are different and thus needs a validation + * pass. + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { */ + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + /* } */ + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + +/** + * Set the Message Descriptor and Extended Message Descriptor fields + * for SEND messages. + * + * \note This zeroes out the Function Control bits, so it must be called + * \b before filling out any message-specific data. Callers can + * choose not to fill in irrelevant bits; they will be zero. + */ +static void +brw_set_message_descriptor(struct brw_compile *p, + struct brw_instruction *inst, + enum brw_message_target sfid, + unsigned msg_length, + unsigned response_length, + bool header_present, + bool end_of_thread) +{ + struct intel_context *intel = &p->brw->intel; + + brw_set_src1(p, inst, brw_imm_d(0)); + + if (intel->gen >= 5) { + inst->bits3.generic_gen5.header_present = header_present; + inst->bits3.generic_gen5.response_length = response_length; + inst->bits3.generic_gen5.msg_length = msg_length; + inst->bits3.generic_gen5.end_of_thread = end_of_thread; + + if (intel->gen >= 6) { + /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ + inst->header.destreg__conditionalmod = sfid; + } else { + /* Set Extended Message Descriptor (ex_desc) */ + inst->bits2.send_gen5.sfid = sfid; + inst->bits2.send_gen5.end_of_thread = end_of_thread; + } + } else { + inst->bits3.generic.response_length = response_length; + inst->bits3.generic.msg_length = msg_length; + inst->bits3.generic.msg_target = sfid; + inst->bits3.generic.end_of_thread = end_of_thread; + } +} + +static void brw_set_math_message( struct brw_compile *p, + struct brw_instruction *insn, + unsigned function, + unsigned integer_type, + bool low_precision, + unsigned dataType ) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + unsigned msg_length; + unsigned response_length; + + /* Infer message length from the function */ + switch (function) { + case BRW_MATH_FUNCTION_POW: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: + case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: + msg_length = 2; + break; + default: + msg_length = 1; + break; + } + + /* Infer response length from the function */ + switch (function) { + case BRW_MATH_FUNCTION_SINCOS: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: + response_length = 2; + break; + default: + response_length = 1; + break; + } + + + brw_set_message_descriptor(p, insn, BRW_SFID_MATH, + msg_length, response_length, false, false); + if (intel->gen == 5) { + insn->bits3.math_gen5.function = function; + insn->bits3.math_gen5.int_type = integer_type; + insn->bits3.math_gen5.precision = low_precision; + insn->bits3.math_gen5.saturate = insn->header.saturate; + insn->bits3.math_gen5.data_type = dataType; + insn->bits3.math_gen5.snapshot = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = insn->header.saturate; + insn->bits3.math.data_type = dataType; + } + insn->header.saturate = 0; +} + + +static void brw_set_ff_sync_message(struct brw_compile *p, + struct brw_instruction *insn, + bool allocate, + unsigned response_length, + bool end_of_thread) +{ + brw_set_message_descriptor(p, insn, BRW_SFID_URB, + 1, response_length, true, end_of_thread); + insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ + insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ +} + +static void brw_set_urb_message( struct brw_compile *p, + struct brw_instruction *insn, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool end_of_thread, + bool complete, + unsigned offset, + unsigned swizzle_control ) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + + brw_set_message_descriptor(p, insn, BRW_SFID_URB, + msg_length, response_length, true, end_of_thread); + if (intel->gen == 7) { + insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ + insn->bits3.urb_gen7.offset = offset; + assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); + insn->bits3.urb_gen7.swizzle_control = swizzle_control; + /* per_slot_offset = 0 makes it ignore offsets in message header */ + insn->bits3.urb_gen7.per_slot_offset = 0; + insn->bits3.urb_gen7.complete = complete; + } else if (intel->gen >= 5) { + insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ + insn->bits3.urb_gen5.offset = offset; + insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.allocate = allocate; + insn->bits3.urb_gen5.used = used; /* ? */ + insn->bits3.urb_gen5.complete = complete; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + } +} + +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + bool header_present, + unsigned last_render_target, + unsigned response_length, + unsigned end_of_thread, + unsigned send_commit_msg) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + unsigned sfid; + + if (intel->gen >= 7) { + /* Use the Render Cache for RT writes; otherwise use the Data Cache */ + if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + else + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + } else if (intel->gen == 6) { + /* Use the render cache for all write messages. */ + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + } else { + sfid = BRW_SFID_DATAPORT_WRITE; + } + + brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, + header_present, end_of_thread); + + if (intel->gen >= 7) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control | + last_render_target << 6; + insn->bits3.gen7_dp.msg_type = msg_type; + } else if (intel->gen == 6) { + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control | + last_render_target << 5; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; + } else if (intel->gen == 5) { + insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_write_gen5.msg_control = msg_control; + insn->bits3.dp_write_gen5.last_render_target = last_render_target; + insn->bits3.dp_write_gen5.msg_type = msg_type; + insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.last_render_target = last_render_target; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = send_commit_msg; + } +} + +void +brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + bool header_present, + unsigned response_length) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + unsigned sfid; + + if (intel->gen >= 7) { + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + } else if (intel->gen == 6) { + if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) + sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + else + sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; + } else { + sfid = BRW_SFID_DATAPORT_READ; + } + + brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, + header_present, false); + + if (intel->gen >= 7) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.msg_type = msg_type; + } else if (intel->gen == 6) { + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = 0; + } else if (intel->gen == 5) { + insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; + insn->bits3.dp_read_gen5.msg_control = msg_control; + insn->bits3.dp_read_gen5.msg_type = msg_type; + insn->bits3.dp_read_gen5.target_cache = target_cache; + } else if (intel->is_g4x) { + insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ + insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ + insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + } +} + +void +brw_set_sampler_message(struct brw_compile *p, + struct brw_instruction *insn, + unsigned binding_table_index, + unsigned sampler, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + unsigned header_present, + unsigned simd_mode, + unsigned return_format) +{ + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + + brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length, + response_length, header_present, false); + + if (intel->gen >= 7) { + insn->bits3.sampler_gen7.binding_table_index = binding_table_index; + insn->bits3.sampler_gen7.sampler = sampler; + insn->bits3.sampler_gen7.msg_type = msg_type; + insn->bits3.sampler_gen7.simd_mode = simd_mode; + } else if (intel->gen >= 5) { + insn->bits3.sampler_gen5.binding_table_index = binding_table_index; + insn->bits3.sampler_gen5.sampler = sampler; + insn->bits3.sampler_gen5.msg_type = msg_type; + insn->bits3.sampler_gen5.simd_mode = simd_mode; + } else if (intel->is_g4x) { + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = return_format; + } +} + + +#define next_insn brw_next_insn +struct brw_instruction * +brw_next_insn(struct brw_compile *p, unsigned opcode) +{ + struct brw_instruction *insn; + + if (p->nr_insn + 1 > p->store_size) { + if (0) + printf("incresing the store size to %d\n", p->store_size << 1); + p->store_size <<= 1; + p->store = reralloc(p->mem_ctx, p->store, + struct brw_instruction, p->store_size); + if (!p->store) + assert(!"realloc eu store memeory failed"); + } + + p->next_insn_offset += 16; + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: + */ + + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + +static struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + return insn; +} + +static struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + return insn; +} + +static int +get_3src_subreg_nr(struct brw_reg reg) +{ + if (reg.vstride == BRW_VERTICAL_STRIDE_0) { + assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle)); + return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0); + } else { + return reg.subnr / 4; + } +} + +static int get_3src_type(int type) +{ + assert(type == BRW_REGISTER_TYPE_F || + type == BRW_REGISTER_TYPE_D || + type == BRW_REGISTER_TYPE_UD); + + switch(type) { + case BRW_REGISTER_TYPE_F: return BRW_REGISTER_3SRC_TYPE_F; + case BRW_REGISTER_TYPE_D: return BRW_REGISTER_3SRC_TYPE_D; + case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_3SRC_TYPE_UD; + } + + return BRW_REGISTER_3SRC_TYPE_F; +} + +void +brw_set_3src_dest(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg dest) +{ + gen7_convert_mrf_to_grf(p, &dest); + + assert(insn->header.access_mode == BRW_ALIGN_16); + + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_MESSAGE_REGISTER_FILE); + assert(dest.nr < 128); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + insn->bits1.da3src.dest_reg_type = get_3src_type(dest.type); + insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE); + insn->bits1.da3src.dest_reg_nr = dest.nr; + insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask; + guess_execution_size(p, insn, dest); +} + +void +brw_set_3src_src0(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg src0) +{ + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + assert(src0.address_mode == BRW_ADDRESS_DIRECT); + assert(src0.nr < 128); + insn->bits1.da3src.src_reg_type = get_3src_type(src0.type); + insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle; + insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0); + insn->bits2.da3src.src0_reg_nr = src0.nr; + insn->bits1.da3src.src0_abs = src0.abs; + insn->bits1.da3src.src0_negate = src0.negate; + insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0; +} + +void +brw_set_3src_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg src1) +{ + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + assert(src1.address_mode == BRW_ADDRESS_DIRECT); + assert(src1.nr < 128); + assert(src1.type == insn->bits1.da3src.src_reg_type); + insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle; + insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3; + insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2; + insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0; + insn->bits3.da3src.src1_reg_nr = src1.nr; + insn->bits1.da3src.src1_abs = src1.abs; + insn->bits1.da3src.src1_negate = src1.negate; +} + +void +brw_set_3src_src2(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg src2) +{ + assert(src2.file == BRW_GENERAL_REGISTER_FILE); + assert(src2.address_mode == BRW_ADDRESS_DIRECT); + assert(src2.nr < 128); + assert(src2.type == insn->bits1.da3src.src_reg_type); + insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle; + insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2); + insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0; + insn->bits3.da3src.src2_reg_nr = src2.nr; + insn->bits1.da3src.src2_abs = src2.abs; + insn->bits1.da3src.src2_negate = src2.negate; +} + +static struct brw_instruction *brw_alu3(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1, + struct brw_reg src2) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_3src_dest(p, insn, dest); + brw_set_3src_src0(p, insn, src0); + brw_set_3src_src1(p, insn, src1); + brw_set_3src_src2(p, insn, src2); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + +#define ALU3(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1, \ + struct brw_reg src2) \ +{ \ + return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ +} + +/* Rounding operations (other than RNDD) require two instructions - the first + * stores a rounded value (possibly the wrong way) in the dest register, but + * also sets a per-channel "increment bit" in the flag register. A predicated + * add of 1.0 fixes dest to contain the desired result. + * + * Sandybridge and later appear to round correctly without an ADD. + */ +#define ROUND(OP) \ +void brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src) \ +{ \ + struct brw_instruction *rnd, *add; \ + rnd = next_insn(p, BRW_OPCODE_##OP); \ + brw_set_dest(p, rnd, dest); \ + brw_set_src0(p, rnd, src); \ + \ + if (p->brw->intel.gen < 6) { \ + /* turn on round-increments */ \ + rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ + add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ + add->header.predicate_control = BRW_PREDICATE_NORMAL; \ + } \ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) +ALU2(PLN) +ALU3(MAD) + +ROUND(RNDZ) +ROUND(RNDE) + + +struct brw_instruction *brw_ADD(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.2.2: add */ + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); +} + +struct brw_instruction *brw_AVG(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + assert(dest.type == src0.type); + assert(src0.type == src1.type); + switch (src0.type) { + case BRW_REGISTER_TYPE_B: + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_W: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_UD: + break; + default: + assert(!"Bad type for brw_AVG"); + } + + return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1); +} + +struct brw_instruction *brw_MUL(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + /* 6.32.38: mul */ + if (src0.type == BRW_REGISTER_TYPE_D || + src0.type == BRW_REGISTER_TYPE_UD || + src1.type == BRW_REGISTER_TYPE_D || + src1.type == BRW_REGISTER_TYPE_UD) { + assert(dest.type != BRW_REGISTER_TYPE_F); + } + + if (src0.type == BRW_REGISTER_TYPE_F || + (src0.file == BRW_IMMEDIATE_VALUE && + src0.type == BRW_REGISTER_TYPE_VF)) { + assert(src1.type != BRW_REGISTER_TYPE_UD); + assert(src1.type != BRW_REGISTER_TYPE_D); + } + + if (src1.type == BRW_REGISTER_TYPE_F || + (src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_VF)) { + assert(src0.type != BRW_REGISTER_TYPE_UD); + assert(src0.type != BRW_REGISTER_TYPE_D); + } + + assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || + src0.nr != BRW_ARF_ACCUMULATOR); + assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || + src1.nr != BRW_ARF_ACCUMULATOR); + + return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); +} + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +static void +push_if_stack(struct brw_compile *p, struct brw_instruction *inst) +{ + p->if_stack[p->if_stack_depth] = inst - p->store; + + p->if_stack_depth++; + if (p->if_stack_array_size <= p->if_stack_depth) { + p->if_stack_array_size *= 2; + p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, + p->if_stack_array_size); + } +} + +static struct brw_instruction * +pop_if_stack(struct brw_compile *p) +{ + p->if_stack_depth--; + return &p->store[p->if_stack[p->if_stack_depth]]; +} + +static void +push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) +{ + if (p->loop_stack_array_size < p->loop_stack_depth) { + p->loop_stack_array_size *= 2; + p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, + p->loop_stack_array_size); + p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int, + p->loop_stack_array_size); + } + + p->loop_stack[p->loop_stack_depth] = inst - p->store; + p->loop_stack_depth++; + p->if_depth_in_loop[p->loop_stack_depth] = 0; +} + +static struct brw_instruction * +get_inner_do_insn(struct brw_compile *p) +{ + return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + */ +struct brw_instruction * +brw_IF(struct brw_compile *p, unsigned execute_size) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + insn = next_insn(p, BRW_OPCODE_IF); + + /* Override the defaults for this instruction: + */ + if (intel->gen < 6) { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (intel->gen == 6) { + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); + brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); + } else { + brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); + brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; + } + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + push_if_stack(p, insn); + p->if_depth_in_loop[p->loop_stack_depth]++; + return insn; +} + +/* This function is only used for gen6-style IF instructions with an + * embedded comparison (conditional modifier). It is not used on gen7. + */ +struct brw_instruction * +gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1) +{ + struct brw_instruction *insn; + + insn = next_insn(p, BRW_OPCODE_IF); + + brw_set_dest(p, insn, brw_imm_w(0)); + if (p->compressed) { + insn->header.execution_size = BRW_EXECUTE_16; + } else { + insn->header.execution_size = BRW_EXECUTE_8; + } + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + + assert(insn->header.compression_control == BRW_COMPRESSION_NONE); + assert(insn->header.predicate_control == BRW_PREDICATE_NONE); + insn->header.destreg__conditionalmod = conditional; + + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + push_if_stack(p, insn); + return insn; +} + +/** + * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. + */ +static void +convert_IF_ELSE_to_ADD(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst) +{ + /* The next instruction (where the ENDIF would be, if it existed) */ + struct brw_instruction *next_inst = &p->store[p->nr_insn]; + + assert(p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + assert(if_inst->header.execution_size == BRW_EXECUTE_1); + + /* Convert IF to an ADD instruction that moves the instruction pointer + * to the first instruction of the ELSE block. If there is no ELSE + * block, point to where ENDIF would be. Reverse the predicate. + * + * There's no need to execute an ENDIF since we don't need to do any + * stack operations, and if we're currently executing, we just want to + * continue normally. + */ + if_inst->header.opcode = BRW_OPCODE_ADD; + if_inst->header.predicate_inverse = 1; + + if (else_inst != NULL) { + /* Convert ELSE to an ADD instruction that points where the ENDIF + * would be. + */ + else_inst->header.opcode = BRW_OPCODE_ADD; + + if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; + else_inst->bits3.ud = (next_inst - else_inst) * 16; + } else { + if_inst->bits3.ud = (next_inst - if_inst) * 16; + } +} + +/** + * Patch IF and ELSE instructions with appropriate jump targets. + */ +static void +patch_IF_ELSE(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst, + struct brw_instruction *endif_inst) +{ + struct intel_context *intel = &p->brw->intel; + + /* We shouldn't be patching IF and ELSE instructions in single program flow + * mode when gen < 6, because in single program flow mode on those + * platforms, we convert flow control instructions to conditional ADDs that + * operate on IP (see brw_ENDIF). + * + * However, on Gen6, writing to IP doesn't work in single program flow mode + * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may + * not be updated by non-flow control instructions."). And on later + * platforms, there is no significant benefit to converting control flow + * instructions to conditional ADDs. So we do patch IF and ELSE + * instructions in single program flow mode on those platforms. + */ + if (intel->gen < 6) + assert(!p->single_program_flow); + + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(endif_inst != NULL); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + + unsigned br = 1; + /* Jump count is for 64bit data chunk each, so one 128bit instruction + * requires 2 chunks. + */ + if (intel->gen >= 5) + br = 2; + + assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); + endif_inst->header.execution_size = if_inst->header.execution_size; + + if (else_inst == NULL) { + /* Patch IF -> ENDIF */ + if (intel->gen < 6) { + /* Turn it into an IFF, which means no mask stack operations for + * all-false and jumping past the ENDIF. + */ + if_inst->header.opcode = BRW_OPCODE_IFF; + if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (intel->gen == 6) { + /* As of gen6, there is no IFF and IF must point to the ENDIF. */ + if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); + } else { + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); + } + } else { + else_inst->header.execution_size = if_inst->header.execution_size; + + /* Patch IF -> ELSE */ + if (intel->gen < 6) { + if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (intel->gen == 6) { + if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); + } + + /* Patch ELSE -> ENDIF */ + if (intel->gen < 6) { + /* BRW_OPCODE_ELSE pre-gen6 should point just past the + * matching ENDIF. + */ + else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); + else_inst->bits3.if_else.pop_count = 1; + else_inst->bits3.if_else.pad0 = 0; + } else if (intel->gen == 6) { + /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ + else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); + } else { + /* The IF instruction's JIP should point just past the ELSE */ + if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); + /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); + } + } +} + +void +brw_ELSE(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + insn = next_insn(p, BRW_OPCODE_ELSE); + + if (intel->gen < 6) { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (intel->gen == 6) { + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = 0; + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } else { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; + } + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + push_if_stack(p, insn); +} + +void +brw_ENDIF(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn = NULL; + struct brw_instruction *else_inst = NULL; + struct brw_instruction *if_inst = NULL; + struct brw_instruction *tmp; + bool emit_endif = true; + + /* In single program flow mode, we can express IF and ELSE instructions + * equivalently as ADD instructions that operate on IP. On platforms prior + * to Gen6, flow control instructions cause an implied thread switch, so + * this is a significant savings. + * + * However, on Gen6, writing to IP doesn't work in single program flow mode + * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may + * not be updated by non-flow control instructions."). And on later + * platforms, there is no significant benefit to converting control flow + * instructions to conditional ADDs. So we only do this trick on Gen4 and + * Gen5. + */ + if (intel->gen < 6 && p->single_program_flow) + emit_endif = false; + + /* + * A single next_insn() may change the base adress of instruction store + * memory(p->store), so call it first before referencing the instruction + * store pointer from an index + */ + if (emit_endif) + insn = next_insn(p, BRW_OPCODE_ENDIF); + + /* Pop the IF and (optional) ELSE instructions from the stack */ + p->if_depth_in_loop[p->loop_stack_depth]--; + tmp = pop_if_stack(p); + if (tmp->header.opcode == BRW_OPCODE_ELSE) { + else_inst = tmp; + tmp = pop_if_stack(p); + } + if_inst = tmp; + + if (!emit_endif) { + /* ENDIF is useless; don't bother emitting it. */ + convert_IF_ELSE_to_ADD(p, if_inst, else_inst); + return; + } + + if (intel->gen < 6) { + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (intel->gen == 6) { + brw_set_dest(p, insn, brw_imm_w(0)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } else { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_ud(0)); + } + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; + + /* Also pop item off the stack in the endif instruction: */ + if (intel->gen < 6) { + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } else if (intel->gen == 6) { + insn->bits1.branch_gen6.jump_count = 2; + } else { + insn->bits3.break_cont.jip = 2; + } + patch_IF_ELSE(p, if_inst, else_inst, insn); +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + insn = next_insn(p, BRW_OPCODE_BREAK); + if (intel->gen >= 6) { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *gen6_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; + return insn; +} + +struct brw_instruction *gen6_HALT(struct brw_compile *p) +{ + struct brw_instruction *insn; + + insn = next_insn(p, BRW_OPCODE_HALT); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ + + if (p->compressed) { + insn->header.execution_size = BRW_EXECUTE_16; + } else { + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + } + return insn; +} + +/* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. + */ +struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) +{ + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6 || p->single_program_flow) { + push_loop_stack(p, &p->store[p->nr_insn]); + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + + push_loop_stack(p, insn); + + /* Override the defaults for this instruction: + */ + brw_set_dest(p, insn, brw_null_reg()); + brw_set_src0(p, insn, brw_null_reg()); + brw_set_src1(p, insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + + return insn; + } +} + +/** + * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE + * instruction here. + * + * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop + * nesting, since it can always just point to the end of the block/current loop. + */ +static void +brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *do_inst = get_inner_do_insn(p); + struct brw_instruction *inst; + int br = (intel->gen == 5) ? 2 : 1; + + for (inst = while_inst - 1; inst != do_inst; inst--) { + /* If the jump count is != 0, that means that this instruction has already + * been patched because it's part of a loop inside of the one we're + * patching. + */ + if (inst->header.opcode == BRW_OPCODE_BREAK && + inst->bits3.if_else.jump_count == 0) { + inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1); + } else if (inst->header.opcode == BRW_OPCODE_CONTINUE && + inst->bits3.if_else.jump_count == 0) { + inst->bits3.if_else.jump_count = br * (while_inst - inst); + } + } +} + +struct brw_instruction *brw_WHILE(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn, *do_insn; + unsigned br = 1; + + if (intel->gen >= 5) + br = 2; + + if (intel->gen >= 7) { + insn = next_insn(p, BRW_OPCODE_WHILE); + do_insn = get_inner_do_insn(p); + + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = br * (do_insn - insn); + + insn->header.execution_size = BRW_EXECUTE_8; + } else if (intel->gen == 6) { + insn = next_insn(p, BRW_OPCODE_WHILE); + do_insn = get_inner_do_insn(p); + + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + + insn->header.execution_size = BRW_EXECUTE_8; + } else { + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + do_insn = get_inner_do_insn(p); + + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = next_insn(p, BRW_OPCODE_WHILE); + do_insn = get_inner_do_insn(p); + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0)); + + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + + brw_patch_break_cont(p, insn); + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + p->loop_stack_depth--; + + return insn; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx]; + unsigned jmpi = 1; + + if (intel->gen >= 5) + jmpi = 2; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1); +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditionalmod = conditional; + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); + +/* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + +/* Issue 'wait' instruction for n1, host could program MMIO + to wake up thread. */ +void brw_WAIT (struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); + struct brw_reg src = brw_notification_1_reg(); + + brw_set_dest(p, insn, src); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + insn->header.execution_size = 0; /* must */ + insn->header.predicate_control = 0; + insn->header.compression_control = 0; +} + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/** Extended math function, float[8]. + */ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ) +{ + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + if (intel->gen == 6) + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + + /* Source modifiers are ignored for extended math instructions on Gen6. */ + if (intel->gen == 6) { + assert(!src.negate); + assert(!src.abs); + } + + if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || + function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || + function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src.type != BRW_REGISTER_TYPE_F); + } else { + assert(src.type == BRW_REGISTER_TYPE_F); + } + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_math_message(p, + insn, + function, + src.type == BRW_REGISTER_TYPE_D, + precision, + data_type); + } +} + +/** Extended math function, float[8]. + */ +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + unsigned function, + struct brw_reg src0, + struct brw_reg src1) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); + + assert(intel->gen >= 6); + (void) intel; + + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + if (intel->gen == 6) { + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + } + + if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || + function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || + function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src0.type != BRW_REGISTER_TYPE_F); + assert(src1.type != BRW_REGISTER_TYPE_F); + } else { + assert(src0.type == BRW_REGISTER_TYPE_F); + assert(src1.type == BRW_REGISTER_TYPE_F); + } + + /* Source modifiers are ignored for extended math instructions on Gen6. */ + if (intel->gen == 6) { + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + } + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); +} + + +/** + * Write a block of OWORDs (half a GRF each) from the scratch buffer, + * using a constant offset per channel. + * + * The offset must be aligned to oword size (16 bytes). Used for + * register spilling. + */ +void brw_oword_block_write_scratch(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + unsigned offset) +{ + struct intel_context *intel = &p->brw->intel; + uint32_t msg_control, msg_type; + int mlen; + + if (intel->gen >= 6) + offset /= 16; + + mrf = retype(mrf, BRW_REGISTER_TYPE_UD); + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + mlen = 2; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + mlen = 3; + } + + /* Set up the message header. This is g0, with g0.2 filled with + * the offset. We don't want to leave our offset around in g0 or + * it'll screw up texture samples, so set it up inside the message + * reg. + */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + mrf.nr, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_reg dest; + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + int send_commit_msg; + struct brw_reg src_header = retype(brw_vec8_grf(0, 0), + BRW_REGISTER_TYPE_UW); + + if (insn->header.compression_control != BRW_COMPRESSION_NONE) { + insn->header.compression_control = BRW_COMPRESSION_NONE; + src_header = vec16(src_header); + } + assert(insn->header.predicate_control == BRW_PREDICATE_NONE); + insn->header.destreg__conditionalmod = mrf.nr; + + /* Until gen6, writes followed by reads from the same location + * are not guaranteed to be ordered unless write_commit is set. + * If set, then a no-op write is issued to the destination + * register to set a dependency, and a read from the destination + * can be used to ensure the ordering. + * + * For gen6, only writes between different threads need ordering + * protection. Our use of DP writes is all about register + * spilling within a thread. + */ + if (intel->gen >= 6) { + dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + send_commit_msg = 0; + } else { + dest = src_header; + send_commit_msg = 1; + } + + brw_set_dest(p, insn, dest); + if (intel->gen >= 6) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + + brw_set_dp_write_message(p, + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + msg_type, + mlen, + true, /* header_present */ + 0, /* not a render target */ + send_commit_msg, /* response_length */ + 0, /* eot */ + send_commit_msg); + } +} + + +/** + * Read a block of owords (half a GRF each) from the scratch buffer + * using a constant index per channel. + * + * Offset must be aligned to oword size (16 bytes). Used for register + * spilling. + */ +void +brw_oword_block_read_scratch(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + unsigned offset) +{ + struct intel_context *intel = &p->brw->intel; + uint32_t msg_control; + int rlen; + + if (intel->gen >= 6) + offset /= 16; + + mrf = retype(mrf, BRW_REGISTER_TYPE_UD); + dest = retype(dest, BRW_REGISTER_TYPE_UW); + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + rlen = 1; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + rlen = 2; + } + + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + mrf.nr, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(insn->header.predicate_control == 0); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = mrf.nr; + + brw_set_dest(p, insn, dest); /* UW? */ + if (intel->gen >= 6) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + 255, /* binding table index (255=stateless) */ + msg_control, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 1, /* msg_length */ + true, /* header_present */ + rlen); + } +} + +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + */ +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t offset, + uint32_t bind_table_index) +{ + struct intel_context *intel = &p->brw->intel; + + /* On newer hardware, offset is in units of owords. */ + if (intel->gen >= 6) + offset /= 16; + + mrf = retype(mrf, BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + mrf.nr, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(offset)); + + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = mrf.nr; + + /* cast dest to a uword[8] vector */ + dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(p, insn, dest); + if (intel->gen >= 6) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 1, /* msg_length */ + true, /* header_present */ + 1); /* response_length (1 reg, 2 owords!) */ + + brw_pop_insn_state(p); +} + + +void brw_fb_WRITE(struct brw_compile *p, + int dispatch_width, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned msg_control, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + bool eot, + bool header_present) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + unsigned msg_type; + struct brw_reg dest; + + if (dispatch_width == 16) + dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + else + dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); + + if (intel->gen >= 6) { + insn = next_insn(p, BRW_OPCODE_SENDC); + } else { + insn = next_insn(p, BRW_OPCODE_SEND); + } + /* The execution mask is ignored for render target writes. */ + insn->header.predicate_control = 0; + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (intel->gen >= 6) { + /* headerless version, just submit color payload */ + src0 = brw_message_reg(msg_reg_nr); + + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } else { + insn->header.destreg__conditionalmod = msg_reg_nr; + + msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + } + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_dp_write_message(p, + insn, + binding_table_index, + msg_control, + msg_type, + msg_length, + header_present, + eot, /* last render target write */ + response_length, + eot, + 0 /* send_commit_msg */); +} + + +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + unsigned header_present, + unsigned simd_mode, + unsigned return_format) +{ + struct intel_context *intel = &p->brw->intel; + bool need_stall = 0; + + if (writemask == 0) { + /*printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != BRW_WRITEMASK_XYZW) { + unsigned dst_offset = 0; + unsigned i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) + break; + dst_offset += 2; + } + for (; i < 4; i++) { + if (!(writemask & (1<<i))) + break; + newmask |= 1<<i; + len++; + } + + if (newmask != writemask) { + need_stall = 1; + /* printf("need stall %x %x\n", newmask , writemask); */ + } + else { + bool dispatch_16 = false; + + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + guess_execution_size(p, p->current, dest); + if (p->current->header.execution_size == BRW_EXECUTE_16) + dispatch_16 = true; + + newmask = ~newmask & BRW_WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), + retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + + brw_pop_insn_state(p); + + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); + + /* For 16-wide dispatch, masked channels are skipped in the + * response. For 8-wide, masked channels still take up slots, + * and are just not written to. + */ + if (dispatch_16) + response_length = len * 2; + } + } + + { + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_sampler_message(p, insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + header_present, + simd_mode, + return_format); + } + + if (need_stall) { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), + retype(reg, BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state(p); + } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + bool used, + unsigned msg_length, + unsigned response_length, + bool eot, + bool writes_complete, + unsigned offset, + unsigned swizzle) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + if (intel->gen == 7) { + /* Enable Channel Masks in the URB_WRITE_HWORD message header */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), + BRW_REGISTER_TYPE_UD), + retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), + brw_imm_ud(0xff00)); + brw_pop_insn_state(p); + } + + insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < BRW_MAX_MRF); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_urb_message(p, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + +static int +next_ip(struct brw_compile *p, int ip) +{ + struct brw_instruction *insn = (void *)p->store + ip; + + if (insn->header.cmpt_control) + return ip + 8; + else + return ip + 16; +} + +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + void *store = p->store; + + for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) { + struct brw_instruction *insn = store + ip; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + case BRW_OPCODE_HALT: + return ip; + } + } + + return 0; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + struct intel_context *intel = &p->brw->intel; + int ip; + int scale = 8; + void *store = p->store; + + /* Always start after the instruction (such as a WHILE) we're trying to fix + * up. + */ + for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) { + struct brw_instruction *insn = store + ip; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count + : insn->bits3.break_cont.jip; + if (ip + jip * scale <= start) + return ip; + } + } + assert(!"not reached"); + return start; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK, CONT, and HALT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + int ip; + int scale = 8; + void *store = p->store; + + if (intel->gen < 6) + return; + + for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) { + struct brw_instruction *insn = store + ip; + + if (insn->header.cmpt_control) { + /* Fixups for compacted BREAK/CONTINUE not supported yet. */ + assert(insn->header.opcode != BRW_OPCODE_BREAK && + insn->header.opcode != BRW_OPCODE_CONTINUE && + insn->header.opcode != BRW_OPCODE_HALT); + continue; + } + + int block_end_ip = brw_find_next_block_end(p, ip); + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + assert(block_end_ip != 0); + insn->bits3.break_cont.jip = (block_end_ip - ip) / scale; + /* Gen7 UIP points to WHILE; Gen6 points just after it */ + insn->bits3.break_cont.uip = + (brw_find_loop_end(p, ip) - ip + + (intel->gen == 6 ? 16 : 0)) / scale; + break; + case BRW_OPCODE_CONTINUE: + assert(block_end_ip != 0); + insn->bits3.break_cont.jip = (block_end_ip - ip) / scale; + insn->bits3.break_cont.uip = + (brw_find_loop_end(p, ip) - ip) / scale; + + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + + case BRW_OPCODE_ENDIF: + if (block_end_ip == 0) + insn->bits3.break_cont.jip = 2; + else + insn->bits3.break_cont.jip = (block_end_ip - ip) / scale; + break; + + case BRW_OPCODE_HALT: + /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): + * + * "In case of the halt instruction not inside any conditional + * code block, the value of <JIP> and <UIP> should be the + * same. In case of the halt instruction inside conditional code + * block, the <UIP> should be the end of the program, and the + * <JIP> should be end of the most inner conditional code block." + * + * The uip will have already been set by whoever set up the + * instruction. + */ + if (block_end_ip == 0) { + insn->bits3.break_cont.jip = insn->bits3.break_cont.uip; + } else { + insn->bits3.break_cont.jip = (block_end_ip - ip) / scale; + } + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + bool allocate, + unsigned response_length, + bool eot) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_ff_sync_message(p, + insn, + allocate, + response_length, + eot); +} + +/** + * Emit the SEND instruction necessary to generate stream output data on Gen6 + * (for transform feedback). + * + * If send_commit_msg is true, this is the last piece of stream output data + * from this thread, so send the data as a committed write. According to the + * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): + * + * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all + * writes are complete by sending the final write as a committed write." + */ +void +brw_svb_write(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + bool send_commit_msg) +{ + struct brw_instruction *insn; + + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); + brw_set_dp_write_message(p, insn, + binding_table_index, + 0, /* msg_control: ignored */ + GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, + 1, /* msg_length */ + true, /* header_present */ + 0, /* last_render_target: ignored */ + send_commit_msg, /* response_length */ + 0, /* end_of_thread */ + send_commit_msg); /* send_commit_msg */ +} + +/** + * This instruction is generated as a single-channel align1 instruction by + * both the VS and FS stages when using INTEL_DEBUG=shader_time. + * + * We can't use the typed atomic op in the FS because that has the execution + * mask ANDed with the pixel mask, but we just want to write the one dword for + * all the pixels. + * + * We don't use the SIMD4x2 atomic ops in the VS because want to just write + * one u32. So we use the same untyped atomic write message as the pixel + * shader. + * + * The untyped atomic operation requires a BUFFER surface type with RAW + * format, and is only accessible through the legacy DATA_CACHE dataport + * messages. + */ +void brw_shader_time_add(struct brw_compile *p, + int base_mrf, + uint32_t surf_index) +{ + struct intel_context *intel = &p->brw->intel; + assert(intel->gen >= 7); + + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_mask_control(p, BRW_MASK_DISABLE); + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_pop_insn_state(p); + + /* We use brw_vec1_reg and unmasked because we want to increment the given + * offset only once. + */ + brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, 0)); + brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + base_mrf, 0)); + + bool header_present = false; + bool eot = false; + uint32_t mlen = 2; /* offset, value */ + uint32_t rlen = 0; + brw_set_message_descriptor(p, send, + GEN7_SFID_DATAPORT_DATA_CACHE, + mlen, rlen, header_present, eot); + + send->bits3.ud |= 6 << 14; /* untyped atomic op */ + send->bits3.ud |= 0 << 13; /* no return data */ + send->bits3.ud |= 1 << 12; /* SIMD8 mode */ + send->bits3.ud |= BRW_AOP_ADD << 8; + send->bits3.ud |= surf_index << 0; +} diff --git a/assembler/brw_eu_util.c b/assembler/brw_eu_util.c new file mode 100644 index 0000000..f9126ab --- /dev/null +++ b/assembler/brw_eu_util.c @@ -0,0 +1,125 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count) +{ + unsigned i; + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/assembler/brw_reg.h b/assembler/brw_reg.h new file mode 100644 index 0000000..f225915 --- /dev/null +++ b/assembler/brw_reg.h @@ -0,0 +1,808 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +/** @file brw_reg.h + * + * This file defines struct brw_reg, which is our representation for EU + * registers. They're not a hardware specific format, just an abstraction + * that intends to capture the full flexibility of the hardware registers. + * + * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode + * the abstract brw_reg type into the actual hardware instruction encoding. + */ + +#ifndef BRW_REG_H +#define BRW_REG_H + +#include <stdbool.h> +#include <assert.h> +#include "brw_defines.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** + * First GRF used for the MRF hack. + * + * On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We + * haven't converted our compiler to be aware of this, so it asks for MRFs and + * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The + * register allocators have to be careful of this to avoid corrupting the "MRF"s + * with actual GRF allocations. + */ +#define GEN7_MRF_HACK_START 112 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + +static inline bool +brw_is_single_value_swizzle(int swiz) +{ + return (swiz == BRW_SWIZZLE_XXXX || + swiz == BRW_SWIZZLE_YYYY || + swiz == BRW_SWIZZLE_ZZZZ || + swiz == BRW_SWIZZLE_WWWW); +} + +#define BRW_WRITEMASK_X 0x1 +#define BRW_WRITEMASK_Y 0x2 +#define BRW_WRITEMASK_Z 0x4 +#define BRW_WRITEMASK_W 0x8 + +#define BRW_WRITEMASK_XY (BRW_WRITEMASK_X | BRW_WRITEMASK_Y) +#define BRW_WRITEMASK_XZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Z) +#define BRW_WRITEMASK_XW (BRW_WRITEMASK_X | BRW_WRITEMASK_W) +#define BRW_WRITEMASK_YW (BRW_WRITEMASK_Y | BRW_WRITEMASK_W) +#define BRW_WRITEMASK_ZW (BRW_WRITEMASK_Z | BRW_WRITEMASK_W) +#define BRW_WRITEMASK_XYZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | BRW_WRITEMASK_Z) +#define BRW_WRITEMASK_XYZW (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | \ + BRW_WRITEMASK_Z | BRW_WRITEMASK_W) + +#define REG_SIZE (8*4) + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg { + unsigned type:4; + unsigned file:2; + unsigned nr:8; + unsigned subnr:5; /* :1 in align16 */ + unsigned negate:1; /* source only */ + unsigned abs:1; /* source only */ + unsigned vstride:4; /* source only */ + unsigned width:3; /* src only, align1 only */ + unsigned hstride:2; /* align1 only */ + unsigned address_mode:1; /* relative addressing, hopefully! */ + unsigned pad0:1; + + union { + struct { + unsigned swizzle:8; /* src only, align16 only */ + unsigned writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + unsigned pad1:10; /* two dwords total */ + } bits; + + float f; + int d; + unsigned ud; + } dw1; +}; + + +struct brw_indirect { + unsigned addr_subnr:4; + int addr_offset:10; + unsigned pad:18; +}; + + +static inline int +type_sz(unsigned type) +{ + switch(type) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield + */ +static inline struct brw_reg +brw_reg(unsigned file, + unsigned nr, + unsigned subnr, + unsigned type, + unsigned vstride, + unsigned width, + unsigned hstride, + unsigned swizzle, + unsigned writemask) +{ + struct brw_reg reg; + if (file == BRW_GENERAL_REGISTER_FILE) + assert(nr < BRW_MAX_GRF); + else if (file == BRW_MESSAGE_REGISTER_FILE) + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + else if (file == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_TIMESTAMP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +/** Construct float[16] register */ +static inline struct brw_reg +brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static inline struct brw_reg +brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static inline struct brw_reg +brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + BRW_WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static inline struct brw_reg +brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + BRW_WRITEMASK_XY); +} + +/** Construct float[1] register */ +static inline struct brw_reg +brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + BRW_WRITEMASK_X); +} + + +static inline struct brw_reg +retype(struct brw_reg reg, unsigned type) +{ + reg.type = type; + return reg; +} + +static inline struct brw_reg +sechalf(struct brw_reg reg) +{ + if (reg.vstride) + reg.nr++; + return reg; +} + +static inline struct brw_reg +suboffset(struct brw_reg reg, unsigned delta) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static inline struct brw_reg +offset(struct brw_reg reg, unsigned delta) +{ + reg.nr += delta; + return reg; +} + + +static inline struct brw_reg +byte_offset(struct brw_reg reg, unsigned bytes) +{ + unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +/** Construct unsigned word[16] register */ +static inline struct brw_reg +brw_uw16_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static inline struct brw_reg +brw_uw8_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static inline struct brw_reg +brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static inline struct brw_reg +brw_imm_reg(unsigned type) +{ + return brw_reg(BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static inline struct brw_reg +brw_imm_f(float f) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static inline struct brw_reg +brw_imm_d(int d) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static inline struct brw_reg +brw_imm_ud(unsigned ud) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static inline struct brw_reg +brw_imm_uw(uint16_t uw) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static inline struct brw_reg +brw_imm_w(int16_t w) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static inline struct brw_reg +brw_imm_v(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static inline struct brw_reg +brw_imm_vf(unsigned v) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static inline struct brw_reg +brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24)); + return imm; +} + + +static inline struct brw_reg +brw_address(struct brw_reg reg) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static inline struct brw_reg +brw_vec1_grf(unsigned nr, unsigned subnr) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static inline struct brw_reg +brw_vec2_grf(unsigned nr, unsigned subnr) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static inline struct brw_reg +brw_vec4_grf(unsigned nr, unsigned subnr) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static inline struct brw_reg +brw_vec8_grf(unsigned nr, unsigned subnr) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static inline struct brw_reg +brw_uw8_grf(unsigned nr, unsigned subnr) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static inline struct brw_reg +brw_uw16_grf(unsigned nr, unsigned subnr) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +/** Construct null register (usually used for setting condition codes) */ +static inline struct brw_reg +brw_null_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0); +} + +static inline struct brw_reg +brw_address_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static inline struct brw_reg +brw_ip_reg(void) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + BRW_WRITEMASK_XYZW); /* NOTE! */ +} + +static inline struct brw_reg +brw_acc_reg(void) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, 0); +} + +static inline struct brw_reg +brw_notification_1_reg(void) +{ + + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NOTIFICATION_COUNT, + 1, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + BRW_WRITEMASK_X); +} + + +static inline struct brw_reg +brw_flag_reg(int reg, int subreg) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG + reg, subreg); +} + + +static inline struct brw_reg +brw_mask_reg(unsigned subnr) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr); +} + +static inline struct brw_reg +brw_message_reg(unsigned nr) +{ + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); +} + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static inline unsigned cvt(unsigned val) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static inline struct brw_reg +stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride) +{ + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + + +static inline struct brw_reg +vec16(struct brw_reg reg) +{ + return stride(reg, 16,16,1); +} + +static inline struct brw_reg +vec8(struct brw_reg reg) +{ + return stride(reg, 8,8,1); +} + +static inline struct brw_reg +vec4(struct brw_reg reg) +{ + return stride(reg, 4,4,1); +} + +static inline struct brw_reg +vec2(struct brw_reg reg) +{ + return stride(reg, 2,2,1); +} + +static inline struct brw_reg +vec1(struct brw_reg reg) +{ + return stride(reg, 0,1,0); +} + + +static inline struct brw_reg +get_element(struct brw_reg reg, unsigned elt) +{ + return vec1(suboffset(reg, elt)); +} + +static inline struct brw_reg +get_element_ud(struct brw_reg reg, unsigned elt) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + +static inline struct brw_reg +get_element_d(struct brw_reg reg, unsigned elt) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt)); +} + + +static inline struct brw_reg +brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static inline struct brw_reg +brw_swizzle1(struct brw_reg reg, unsigned x) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static inline struct brw_reg +brw_writemask(struct brw_reg reg, unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask &= mask; + return reg; +} + +static inline struct brw_reg +brw_set_writemask(struct brw_reg reg, unsigned mask) +{ + assert(reg.file != BRW_IMMEDIATE_VALUE); + reg.dw1.bits.writemask = mask; + return reg; +} + +static inline struct brw_reg +negate(struct brw_reg reg) +{ + reg.negate ^= 1; + return reg; +} + +static inline struct brw_reg +brw_abs(struct brw_reg reg) +{ + reg.abs = 1; + reg.negate = 0; + return reg; +} + +/************************************************************************/ + +static inline struct brw_reg +brw_vec4_indirect(unsigned subnr, int offset) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg +brw_vec1_indirect(unsigned subnr, int offset) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static inline struct brw_reg +deref_4f(struct brw_indirect ptr, int offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg +deref_1f(struct brw_indirect ptr, int offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static inline struct brw_reg +deref_4b(struct brw_indirect ptr, int offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static inline struct brw_reg +deref_1uw(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static inline struct brw_reg +deref_1d(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static inline struct brw_reg +deref_1ud(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg +get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static inline struct brw_indirect +brw_indirect_offset(struct brw_indirect ptr, int offset) +{ + ptr.addr_offset += offset; + return ptr; +} + +static inline struct brw_indirect +brw_indirect(unsigned addr_subnr, int offset) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +/** Do two brw_regs refer to the same register? */ +static inline bool +brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + +void brw_print_reg(struct brw_reg reg); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/assembler/brw_structs.h b/assembler/brw_structs.h new file mode 100644 index 0000000..8c2d2b9 --- /dev/null +++ b/assembler/brw_structs.h @@ -0,0 +1,1493 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +#include <stdint.h> + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_urb_fence +{ + struct + { + unsigned length:8; + unsigned vs_realloc:1; + unsigned gs_realloc:1; + unsigned clp_realloc:1; + unsigned sf_realloc:1; + unsigned vfe_realloc:1; + unsigned cs_realloc:1; + unsigned pad:2; + unsigned opcode:16; + } header; + + struct + { + unsigned vs_fence:10; + unsigned gs_fence:10; + unsigned clp_fence:10; + unsigned pad:2; + } bits0; + + struct + { + unsigned sf_fence:10; + unsigned vf_fence:10; + unsigned cs_fence:11; + unsigned pad:1; + } bits1; +}; + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + unsigned pad0:1; + unsigned grf_reg_count:3; + unsigned pad1:2; + unsigned kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ +}; + +struct thread1 +{ + unsigned ext_halt_exception_enable:1; + unsigned sw_exception_enable:1; + unsigned mask_stack_exception_enable:1; + unsigned timeout_exception_enable:1; + unsigned illegal_op_exception_enable:1; + unsigned pad0:3; + unsigned depth_coef_urb_read_offset:6; /* WM only */ + unsigned pad1:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad3:5; + unsigned single_program_flow:1; +}; + +struct thread2 +{ + unsigned per_thread_scratch_space:4; + unsigned pad0:6; + unsigned scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + unsigned dispatch_grf_start_reg:4; + unsigned urb_entry_read_offset:6; + unsigned pad0:1; + unsigned urb_entry_read_length:6; + unsigned pad1:1; + unsigned const_urb_entry_read_offset:6; + unsigned pad2:1; + unsigned const_urb_entry_read_length:6; + unsigned pad3:1; +}; + + + +struct brw_clip_unit_state +{ + struct thread0 thread0; + struct + { + unsigned pad0:7; + unsigned sw_exception_enable:1; + unsigned pad1:3; + unsigned mask_stack_exception_enable:1; + unsigned pad2:1; + unsigned illegal_op_exception_enable:1; + unsigned pad3:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad4:5; + unsigned single_program_flow:1; + } thread1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:9; + unsigned gs_output_stats:1; /* not always */ + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:5; /* may be less */ + unsigned pad3:2; + } thread4; + + struct + { + unsigned pad0:13; + unsigned clip_mode:3; + unsigned userclip_enable_flags:8; + unsigned userclip_must_clip:1; + unsigned negative_w_clip_test:1; + unsigned guard_band_enable:1; + unsigned viewport_z_clip_enable:1; + unsigned viewport_xy_clip_enable:1; + unsigned vertex_position_space:1; + unsigned api_mode:1; + unsigned pad2:1; + } clip5; + + struct + { + unsigned pad0:5; + unsigned clipper_viewport_state_ptr:27; + } clip6; + + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + +struct gen6_blend_state +{ + struct { + unsigned dest_blend_factor:5; + unsigned source_blend_factor:5; + unsigned pad3:1; + unsigned blend_func:3; + unsigned pad2:1; + unsigned ia_dest_blend_factor:5; + unsigned ia_source_blend_factor:5; + unsigned pad1:1; + unsigned ia_blend_func:3; + unsigned pad0:1; + unsigned ia_blend_enable:1; + unsigned blend_enable:1; + } blend0; + + struct { + unsigned post_blend_clamp_enable:1; + unsigned pre_blend_clamp_enable:1; + unsigned clamp_range:2; + unsigned pad0:4; + unsigned x_dither_offset:2; + unsigned y_dither_offset:2; + unsigned dither_enable:1; + unsigned alpha_test_func:3; + unsigned alpha_test_enable:1; + unsigned pad1:1; + unsigned logic_op_func:4; + unsigned logic_op_enable:1; + unsigned pad2:1; + unsigned write_disable_b:1; + unsigned write_disable_g:1; + unsigned write_disable_r:1; + unsigned write_disable_a:1; + unsigned pad3:1; + unsigned alpha_to_coverage_dither:1; + unsigned alpha_to_one:1; + unsigned alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + unsigned alpha_test_format:1; + unsigned pad0:14; + unsigned round_disable:1; + unsigned bf_stencil_ref:8; + unsigned stencil_ref:8; + } cc0; + + union { + float alpha_ref_f; + struct { + unsigned ui:8; + unsigned pad0:24; + } alpha_ref_fi; + } cc1; + + float constant_r; + float constant_g; + float constant_b; + float constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + unsigned pad0:3; + unsigned bf_stencil_pass_depth_pass_op:3; + unsigned bf_stencil_pass_depth_fail_op:3; + unsigned bf_stencil_fail_op:3; + unsigned bf_stencil_func:3; + unsigned bf_stencil_enable:1; + unsigned pad1:2; + unsigned stencil_write_enable:1; + unsigned stencil_pass_depth_pass_op:3; + unsigned stencil_pass_depth_fail_op:3; + unsigned stencil_fail_op:3; + unsigned stencil_func:3; + unsigned stencil_enable:1; + } ds0; + + struct { + unsigned bf_stencil_write_mask:8; + unsigned bf_stencil_test_mask:8; + unsigned stencil_write_mask:8; + unsigned stencil_test_mask:8; + } ds1; + + struct { + unsigned pad0:26; + unsigned depth_write_enable:1; + unsigned depth_test_func:3; + unsigned pad1:1; + unsigned depth_test_enable:1; + } ds2; +}; + +struct brw_cc_unit_state +{ + struct + { + unsigned pad0:3; + unsigned bf_stencil_pass_depth_pass_op:3; + unsigned bf_stencil_pass_depth_fail_op:3; + unsigned bf_stencil_fail_op:3; + unsigned bf_stencil_func:3; + unsigned bf_stencil_enable:1; + unsigned pad1:2; + unsigned stencil_write_enable:1; + unsigned stencil_pass_depth_pass_op:3; + unsigned stencil_pass_depth_fail_op:3; + unsigned stencil_fail_op:3; + unsigned stencil_func:3; + unsigned stencil_enable:1; + } cc0; + + + struct + { + unsigned bf_stencil_ref:8; + unsigned stencil_write_mask:8; + unsigned stencil_test_mask:8; + unsigned stencil_ref:8; + } cc1; + + + struct + { + unsigned logicop_enable:1; + unsigned pad0:10; + unsigned depth_write_enable:1; + unsigned depth_test_function:3; + unsigned depth_test:1; + unsigned bf_stencil_write_mask:8; + unsigned bf_stencil_test_mask:8; + } cc2; + + + struct + { + unsigned pad0:8; + unsigned alpha_test_func:3; + unsigned alpha_test:1; + unsigned blend_enable:1; + unsigned ia_blend_enable:1; + unsigned pad1:1; + unsigned alpha_test_format:1; + unsigned pad2:16; + } cc3; + + struct + { + unsigned pad0:5; + unsigned cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ + } cc4; + + struct + { + unsigned pad0:2; + unsigned ia_dest_blend_factor:5; + unsigned ia_src_blend_factor:5; + unsigned ia_blend_function:3; + unsigned statistics_enable:1; + unsigned logicop_func:4; + unsigned pad1:11; + unsigned dither_enable:1; + } cc5; + + struct + { + unsigned clamp_post_alpha_blend:1; + unsigned clamp_pre_alpha_blend:1; + unsigned clamp_range:2; + unsigned pad0:11; + unsigned y_dither_offset:2; + unsigned x_dither_offset:2; + unsigned dest_blend_factor:5; + unsigned src_blend_factor:5; + unsigned blend_function:3; + } cc6; + + struct { + union { + float f; + uint8_t ub[4]; + } alpha_ref; + } cc7; +}; + +struct brw_sf_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:6; + unsigned pad3:1; + } thread4; + + struct + { + unsigned front_winding:1; + unsigned viewport_transform:1; + unsigned pad0:3; + unsigned sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ + } sf5; + + struct + { + unsigned pad0:9; + unsigned dest_org_vbias:4; + unsigned dest_org_hbias:4; + unsigned scissor:1; + unsigned disable_2x2_trifilter:1; + unsigned disable_zero_pix_trifilter:1; + unsigned point_rast_rule:2; + unsigned line_endcap_aa_region_width:2; + unsigned line_width:4; + unsigned fast_scissor_disable:1; + unsigned cull_mode:2; + unsigned aa_enable:1; + } sf6; + + struct + { + unsigned point_size:11; + unsigned use_point_size_state:1; + unsigned subpixel_precision:1; + unsigned sprite_point:1; + unsigned pad0:10; + unsigned aa_line_distance_mode:1; + unsigned trifan_pv:2; + unsigned linestrip_pv:2; + unsigned tristrip_pv:2; + unsigned line_last_pixel_enable:1; + } sf7; + +}; + +struct gen6_scissor_rect +{ + unsigned xmin:16; + unsigned ymin:16; + unsigned xmax:16; + unsigned ymax:16; +}; + +struct brw_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:8; + unsigned rendering_enable:1; /* for Ironlake */ + unsigned pad4:1; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:5; + unsigned pad3:2; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } gs5; + + + struct + { + unsigned max_vp_index:4; + unsigned pad0:12; + unsigned svbi_post_inc_value:10; + unsigned pad1:1; + unsigned svbi_post_inc_enable:1; + unsigned svbi_payload:1; + unsigned discard_adjaceny:1; + unsigned reorder_enable:1; + unsigned pad2:1; + } gs6; +}; + + +struct brw_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:6; + unsigned pad3:1; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } vs5; + + struct + { + unsigned vs_enable:1; + unsigned vert_cache_disable:1; + unsigned pad0:30; + } vs6; +}; + + +struct brw_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned stats_enable:1; + unsigned depth_buffer_clear:1; + unsigned sampler_count:3; + unsigned sampler_state_pointer:27; + } wm4; + + struct + { + unsigned enable_8_pix:1; + unsigned enable_16_pix:1; + unsigned enable_32_pix:1; + unsigned enable_con_32_pix:1; + unsigned enable_con_64_pix:1; + unsigned pad0:1; + + /* These next four bits are for Ironlake+ */ + unsigned fast_span_coverage_enable:1; + unsigned depth_buffer_clear:1; + unsigned depth_buffer_resolve_enable:1; + unsigned hierarchical_depth_buffer_resolve_enable:1; + + unsigned legacy_global_depth_bias:1; + unsigned line_stipple:1; + unsigned depth_offset:1; + unsigned polygon_stipple:1; + unsigned line_aa_region_width:2; + unsigned line_endcap_aa_region_width:2; + unsigned early_depth_test:1; + unsigned thread_dispatch_enable:1; + unsigned program_uses_depth:1; + unsigned program_computes_depth:1; + unsigned program_uses_killpixel:1; + unsigned legacy_line_rast: 1; + unsigned transposed_urb_read_enable:1; + unsigned max_threads:7; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; + + /* for Ironlake only */ + struct { + unsigned pad0:1; + unsigned grf_reg_count_1:3; + unsigned pad1:2; + unsigned kernel_start_pointer_1:26; + } wm8; + + struct { + unsigned pad0:1; + unsigned grf_reg_count_2:3; + unsigned pad1:2; + unsigned kernel_start_pointer_2:26; + } wm9; + + struct { + unsigned pad0:1; + unsigned grf_reg_count_3:3; + unsigned pad1:2; + unsigned kernel_start_pointer_3:26; + } wm10; +}; + +struct brw_sampler_default_color { + float color[4]; +}; + +struct gen5_sampler_default_color { + uint8_t ub[4]; + float f[4]; + uint16_t hf[4]; + uint16_t us[4]; + int16_t s[4]; + uint8_t b[4]; +}; + +struct brw_sampler_state +{ + + struct + { + unsigned shadow_function:3; + unsigned lod_bias:11; + unsigned min_filter:3; + unsigned mag_filter:3; + unsigned mip_filter:2; + unsigned base_level:5; + unsigned min_mag_neq:1; + unsigned lod_preclamp:1; + unsigned default_color_mode:1; + unsigned pad0:1; + unsigned disable:1; + } ss0; + + struct + { + unsigned r_wrap_mode:3; + unsigned t_wrap_mode:3; + unsigned s_wrap_mode:3; + unsigned cube_control_mode:1; + unsigned pad:2; + unsigned max_lod:10; + unsigned min_lod:10; + } ss1; + + + struct + { + unsigned pad:5; + unsigned default_color_pointer:27; + } ss2; + + struct + { + unsigned non_normalized_coord:1; + unsigned pad:12; + unsigned address_round:6; + unsigned max_aniso:3; + unsigned chroma_key_mode:1; + unsigned chroma_key_index:2; + unsigned chroma_key_enable:1; + unsigned monochrome_filter_width:3; + unsigned monochrome_filter_height:3; + } ss3; +}; + +struct gen7_sampler_state +{ + struct + { + unsigned aniso_algorithm:1; + unsigned lod_bias:13; + unsigned min_filter:3; + unsigned mag_filter:3; + unsigned mip_filter:2; + unsigned base_level:5; + unsigned pad1:1; + unsigned lod_preclamp:1; + unsigned default_color_mode:1; + unsigned pad0:1; + unsigned disable:1; + } ss0; + + struct + { + unsigned cube_control_mode:1; + unsigned shadow_function:3; + unsigned pad:4; + unsigned max_lod:12; + unsigned min_lod:12; + } ss1; + + struct + { + unsigned pad:5; + unsigned default_color_pointer:27; + } ss2; + + struct + { + unsigned r_wrap_mode:3; + unsigned t_wrap_mode:3; + unsigned s_wrap_mode:3; + unsigned pad:1; + unsigned non_normalized_coord:1; + unsigned trilinear_quality:2; + unsigned address_round:6; + unsigned max_aniso:3; + unsigned chroma_key_mode:1; + unsigned chroma_key_index:2; + unsigned chroma_key_enable:1; + unsigned pad0:6; + } ss3; +}; + +struct brw_clipper_viewport +{ + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct brw_cc_viewport +{ + float min_depth; + float max_depth; +}; + +struct brw_sf_viewport +{ + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + /* scissor coordinates are inclusive */ + struct { + int16_t xmin; + int16_t ymin; + int16_t xmax; + int16_t ymax; + } scissor; +}; + +struct gen6_sf_viewport { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; +}; + +struct gen7_sf_clip_viewport { + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + unsigned pad0[2]; + + struct { + float xmin; + float xmax; + float ymin; + float ymax; + } guardband; + + float pad1[4]; +}; + +struct brw_vertex_element_state +{ + struct + { + unsigned src_offset:11; + unsigned pad:5; + unsigned src_format:9; + unsigned pad0:1; + unsigned valid:1; + unsigned vertex_buffer_index:5; + } ve0; + + struct + { + unsigned dst_offset:8; + unsigned pad:8; + unsigned vfcomponent3:4; + unsigned vfcomponent2:4; + unsigned vfcomponent1:4; + unsigned vfcomponent0:4; + } ve1; +}; + +struct brw_urb_immediate { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ + struct + { + unsigned opcode:7; + unsigned pad:1; + unsigned access_mode:1; + unsigned mask_control:1; + unsigned dependency_control:2; + unsigned compression_control:2; /* gen6: quater control */ + unsigned thread_control:2; + unsigned predicate_control:4; + unsigned predicate_inverse:1; + unsigned execution_size:3; + /** + * Conditional Modifier for most instructions. On Gen6+, this is also + * used for the SEND instruction's Message Target/SFID. + */ + unsigned destreg__conditionalmod:4; + unsigned acc_wr_control:1; + unsigned cmpt_control:1; + unsigned debug_control:1; + unsigned saturate:1; + } header; + + union { + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_subreg_nr:5; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; /* 0x00000c00 */ + unsigned src1_reg_type:3; /* 0x00007000 */ + unsigned pad:1; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:1; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da16; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad0:6; + unsigned dest_writemask:4; + int dest_indirect_offset:6; + unsigned dest_subreg_nr:3; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia16; + + struct { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + + int jump_count:16; + } branch_gen6; + + struct { + unsigned dest_reg_file:1; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad0:1; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src2_abs:1; + unsigned src2_negate:1; + unsigned src_reg_type:2; + unsigned dest_reg_type:2; + unsigned pad1:1; + unsigned nib_ctrl:1; + unsigned pad2:1; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:3; + unsigned dest_reg_nr:8; + } da3src; + + uint32_t ud; + } bits1; + + + union { + struct + { + unsigned src0_subreg_nr:5; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad:5; + } da1; + + struct + { + int src0_indirect_offset:10; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad:5; + } ia1; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + unsigned src0_subreg_nr:1; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } da16; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + int src0_indirect_offset:6; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_subreg_nr:1; + unsigned flag_reg_nr:1; + unsigned pad1:5; + } ia16; + + /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction. + * + * Does not apply to Gen6+. The SFID/message target moved to bits + * 27:24 of the header (destreg__conditionalmod); EOT is in bits3. + */ + struct + { + unsigned pad:26; + unsigned end_of_thread:1; + unsigned pad1:1; + unsigned sfid:4; + } send_gen5; /* for Ironlake only */ + + struct { + unsigned src0_rep_ctrl:1; + unsigned src0_swizzle:8; + unsigned src0_subreg_nr:3; + unsigned src0_reg_nr:8; + unsigned pad0:1; + unsigned src1_rep_ctrl:1; + unsigned src1_swizzle:8; + unsigned src1_subreg_nr_low:2; + } da3src; + + uint32_t ud; + } bits2; + + union + { + struct + { + unsigned src1_subreg_nr:5; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned pad0:7; + } da1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + unsigned src1_subreg_nr:1; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned pad2:7; + } da16; + + struct + { + int src1_indirect_offset:10; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned pad1:7; + } ia1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + int src1_indirect_offset:6; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned src1_address_mode:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned pad2:7; + } ia16; + + + struct + { + int jump_count:16; /* note: signed */ + unsigned pop_count:4; + unsigned pad0:12; + } if_else; + + /* This is also used for gen7 IF/ELSE instructions */ + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + + int JIP; /* used by Gen6 CALL instructions; Gen7 JMPI */ + + /** + * \defgroup SEND instructions / Message Descriptors + * + * @{ + */ + + /** + * Generic Message Descriptor for Gen4 SEND instructions. The structs + * below expand function_control to something specific for their + * message. Due to struct packing issues, they duplicate these bits. + * + * See the G45 PRM, Volume 4, Table 14-15. + */ + struct { + unsigned function_control:16; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } generic; + + /** + * Generic Message Descriptor for Gen5-7 SEND instructions. + * + * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most + * of the information on the SEND instruction is missing from the public + * Ironlake PRM.) + * + * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies. + * According to the SEND instruction description: + * "The MSb of the message description, the EOT field, always comes from + * bit 127 of the instruction word"...which is bit 31 of this field. + */ + struct { + unsigned function_control:19; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } generic_gen5; + + struct { + unsigned opcode:1; + unsigned requester_type:1; + unsigned pad:2; + unsigned resource_select:1; + unsigned pad1:11; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad2:3; + unsigned end_of_thread:1; + } thread_spawner; + + struct { + unsigned opcode:1; + unsigned requester_type:1; + unsigned pad0:2; + unsigned resource_select:1; + unsigned pad1:14; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad2:2; + unsigned end_of_thread:1; + } thread_spawner_gen5; + + /** G45 PRM, Volume 4, Section 6.1.1.1 */ + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned pad0:8; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } math; + + /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned snapshot:1; + unsigned pad0:10; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } math_gen5; + + /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned return_format:2; + unsigned msg_type:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler; + + /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:4; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */ + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:4; + unsigned simd_mode:2; + unsigned pad0:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } sampler_gen5; + + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned msg_type:5; + unsigned simd_mode:2; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } sampler_gen7; + + struct brw_urb_immediate urb; + + struct { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } urb_gen5; + + struct { + unsigned opcode:3; + unsigned offset:11; + unsigned swizzle_control:1; + unsigned complete:1; + unsigned per_slot_offset:1; + unsigned pad0:2; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } urb_gen7; + + struct { + unsigned binding_table_index:8; + unsigned search_path_index:3; + unsigned lut_subindex:2; + unsigned message_type:2; + unsigned pad0:4; + unsigned header_present:1; + } vme_gen6; + + struct { + unsigned binding_table_index:8; + unsigned pad0:5; + unsigned message_type:2; + unsigned pad1:4; + unsigned header_present:1; + } cre_gen75; + + /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:4; + unsigned msg_type:2; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2 */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned msg_type:3; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read_g4x; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:4; + unsigned msg_type:2; + unsigned target_cache:2; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } dp_read_gen5; + + /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned last_render_target:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_write; + + /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned last_render_target:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } dp_write_gen5; + + /** + * Message for the Sandybridge Sampler Cache or Constant Cache Data Port. + * + * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. + **/ + struct { + unsigned binding_table_index:8; + unsigned msg_control:5; + unsigned msg_type:3; + unsigned pad0:3; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } gen6_dp_sampler_const_cache; + + /** + * Message for the Sandybridge Render Cache Data Port. + * + * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1, + * Section 3.9.2.1.1: Message Descriptor. + * + * "Slot Group Select" and "Last Render Target" are part of the + * 5-bit message control for Render Target Write messages. See + * Section 3.9.9.2.1 of the same volume. + */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:5; + unsigned msg_type:4; + unsigned send_commit_msg:1; + unsigned pad0:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad1:2; + unsigned end_of_thread:1; + } gen6_dp; + + /** + * Message for any of the Gen7 Data Port caches. + * + * Most fields are defined in BSpec volume 5c.2 Data Port / Messages / + * Data Port Messages / Message Descriptor. Once again, "Slot Group + * Select" and "Last Render Target" are part of the 6-bit message + * control for Render Target Writes. + */ + struct { + unsigned binding_table_index:8; + unsigned msg_control:6; + unsigned msg_type:4; + unsigned category:1; + unsigned header_present:1; + unsigned response_length:5; + unsigned msg_length:4; + unsigned pad2:2; + unsigned end_of_thread:1; + } gen7_dp; + /** @} */ + + struct { + unsigned src1_subreg_nr_high:1; + unsigned src1_reg_nr:8; + unsigned pad0:1; + unsigned src2_rep_ctrl:1; + unsigned src2_swizzle:8; + unsigned src2_subreg_nr:3; + unsigned src2_reg_nr:8; + unsigned pad1:2; + } da3src; + + int d; + unsigned ud; + float f; + } bits3; +}; + +struct brw_compact_instruction { + struct { + unsigned opcode:7; /* 0- 6 */ + unsigned debug_control:1; /* 7- 7 */ + unsigned control_index:5; /* 8-12 */ + unsigned data_type_index:5; /* 13-17 */ + unsigned sub_reg_index:5; /* 18-22 */ + unsigned acc_wr_control:1; /* 23-23 */ + unsigned conditionalmod:4; /* 24-27 */ + unsigned flag_subreg_nr:1; /* 28-28 */ + unsigned cmpt_ctrl:1; /* 29-29 */ + unsigned src0_index:2; /* 30-31 */ + } dw0; + + struct { + unsigned src0_index:3; /* 32-24 */ + unsigned src1_index:5; /* 35-39 */ + unsigned dst_reg_nr:8; /* 40-47 */ + unsigned src0_reg_nr:8; /* 48-55 */ + unsigned src1_reg_nr:8; /* 56-63 */ + } dw1; +}; + +#endif diff --git a/assembler/disasm-main.c b/assembler/disasm-main.c new file mode 100644 index 0000000..5bc75af --- /dev/null +++ b/assembler/disasm-main.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> + +#include "gen4asm.h" +#include "brw_eu.h" + +static const struct option longopts[] = { + { NULL, 0, NULL, 0 } +}; + +static struct brw_program * +read_program (FILE *input) +{ + uint32_t inst[4]; + struct brw_program *program; + struct brw_program_instruction *entry, **prev; + int c; + int n = 0; + + program = malloc (sizeof (struct brw_program)); + program->first = NULL; + prev = &program->first; + while ((c = getc (input)) != EOF) { + if (c == '0') { + if (fscanf (input, "x%x", &inst[n]) == 1) { + ++n; + if (n == 4) { + entry = malloc (sizeof (struct brw_program_instruction)); + memcpy (&entry->insn, inst, 4 * sizeof (uint32_t)); + entry->next = NULL; + *prev = entry; + prev = &entry->next; + n = 0; + } + } + } + } + return program; +} + +static struct brw_program * +read_program_binary (FILE *input) +{ + uint32_t temp; + uint8_t inst[16]; + struct brw_program *program; + struct brw_program_instruction *entry, **prev; + int c; + int n = 0; + + program = malloc (sizeof (struct brw_program)); + program->first = NULL; + prev = &program->first; + while ((c = getc (input)) != EOF) { + if (c == '0') { + if (fscanf (input, "x%2x", &temp) == 1) { + inst[n++] = (uint8_t)temp; + if (n == 16) { + entry = malloc (sizeof (struct brw_program_instruction)); + memcpy (&entry->insn, inst, 16 * sizeof (uint8_t)); + entry->next = NULL; + *prev = entry; + prev = &entry->next; + n = 0; + } + } + } + } + return program; +} + +static void usage(void) +{ + fprintf(stderr, "usage: intel-gen4disasm [options] inputfile\n"); + fprintf(stderr, "\t-b, --binary C style binary output\n"); + fprintf(stderr, "\t-o, --output {outputfile} Specify output file\n"); + fprintf(stderr, "\t-g, --gen <4|5|6|7> Specify GPU generation\n"); +} + +int main(int argc, char **argv) +{ + struct brw_program *program; + FILE *input = stdin; + FILE *output = stdout; + char *input_filename = NULL; + char *output_file = NULL; + int byte_array_input = 0; + int o; + int gen = 4; + struct brw_program_instruction *inst; + + while ((o = getopt_long(argc, argv, "o:bg:", longopts, NULL)) != -1) { + switch (o) { + case 'o': + if (strcmp(optarg, "-") != 0) + output_file = optarg; + break; + case 'b': + byte_array_input = 1; + break; + case 'g': + gen = strtol(optarg, NULL, 10); + + if (gen < 4 || gen > 7) { + usage(); + exit(1); + } + + break; + default: + usage(); + exit(1); + } + } + argc -= optind; + argv += optind; + if (argc != 1) { + usage(); + exit(1); + } + + if (strcmp(argv[0], "-") != 0) { + input_filename = argv[0]; + input = fopen(input_filename, "r"); + if (input == NULL) { + perror("Couldn't open input file"); + exit(1); + } + } + if (byte_array_input) + program = read_program_binary (input); + else + program = read_program (input); + if (!program) + exit (1); + if (output_file) { + output = fopen (output_file, "w"); + if (output == NULL) { + perror("Couldn't open output file"); + exit(1); + } + } + + for (inst = program->first; inst; inst = inst->next) + brw_disasm (output, &inst->insn.gen, gen); + exit (0); +} diff --git a/assembler/doc/Makefile.am b/assembler/doc/Makefile.am new file mode 100644 index 0000000..257fc38 --- /dev/null +++ b/assembler/doc/Makefile.am @@ -0,0 +1,3 @@ +EXTRA_DIST = \ + examples/packed_yuv_sf.g4a \ + examples/packed_yuv_wm.g4a diff --git a/assembler/doc/examples/packed_yuv_sf.g4a b/assembler/doc/examples/packed_yuv_sf.g4a new file mode 100644 index 0000000..8c1398f --- /dev/null +++ b/assembler/doc/examples/packed_yuv_sf.g4a @@ -0,0 +1,17 @@ +send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 }; +mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 }; +mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 }; +mov (8) m1<1>F g7<0,1,0>F { align1 }; +mov (8) m2<1>F g7.4<0,1,0>F { align1 }; +mov (8) m3<1>F g3<8,8,1>F { align1 }; +send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT }; +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/assembler/doc/examples/packed_yuv_wm.g4a b/assembler/doc/examples/packed_yuv_wm.g4a new file mode 100644 index 0000000..d312d17 --- /dev/null +++ b/assembler/doc/examples/packed_yuv_wm.g4a @@ -0,0 +1,161 @@ +/* The initial payload of the thread is always g0. + * WM_URB (incoming URB entries) is g3 + * X0_R is g4 + * X1_R is g5 + * Y0_R is g6 + * Y1_R is g7 + */ + + /* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each + * subspan is a 2x2 rectangle, and the screen x/y of the upper left of each + * subspan are given in GRF register 1.2 through 1.5 (which, with the word + * addressing below, are 1.4 through 1.11). + * + * The result is WM_X*_R and WM_Y*R being: + * + * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y} + * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1} + * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y} + * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1} + */ + + /* Set up ss0.x coordinates*/ +mov (1) g4<1>F g1.8<0,1,0>UW { align1 }; +add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 }; +mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 }; +add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 }; + /* Set up ss0.y coordinates */ +mov (1) g6<1>F g1.10<0,1,0>UW { align1 }; +mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 }; +add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 }; +add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 }; + /* set up ss1.x coordinates */ +mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 }; +add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 }; +mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 }; +add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 }; + /* set up ss1.y coordinates */ +mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 }; +mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 }; +add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 }; +add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 }; + /* Set up ss2.x coordinates */ +mov (1) g5<1>F g1.16<0,1,0>UW { align1 }; +add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 }; +mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 }; +add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 }; + /* Set up ss2.y coordinates */ +mov (1) g7<1>F g1.18<0,1,0>UW { align1 }; +mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 }; +add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 }; +add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 }; + /* Set up ss3.x coordinates */ +mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 }; +add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 }; +mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 }; +add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 }; + /* Set up ss3.y coordinates */ +mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 }; +mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 }; +add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 }; +add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 }; + + /* Now, map these screen space coordinates into texture coordinates. */ + /* subtract screen-space X origin of vertex 0. */ +add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 }; +add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 }; + /* scale by texture X increment */ +mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 }; +mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 }; + /* add in texture X offset */ +add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 }; +add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 }; + /* subtract screen-space Y origin of vertex 0. */ +add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 }; +add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 }; + /* scale by texture Y increment */ +mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 }; +mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 }; + /* add in texture Y offset */ +add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 }; +add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 }; + /* sampler */ +mov (8) m1<1>F g4<8,8,1>F { align1 }; +mov (8) m2<1>F g5<8,8,1>F { align1 }; +mov (8) m3<1>F g6<8,8,1>F { align1 }; +mov (8) m4<1>F g7<8,8,1>F { align1 }; + + /* + * g0 holds the PS thread payload, which (oddly) contains + * precisely what the sampler wants to see in m0 + */ +send (16) 0 g12<1>UW g0<8,8,1>UW sampler (1,0,F) mlen 5 rlen 8 { align1 }; +mov (8) g19<1>UW g19<8,8,1>UW { align1 }; + + /* color space conversion function: + * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) + * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) + * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) + * + * Y is g14, g15. + * Cr is g12, g13. + * Cb is g16, g17. + * + * R is g2, g6. + * G is g3, g7. + * B is g4, g8. + */ + /* Y = Y - 16/255 */ +add (8) g14<1>F g14<8,8,1>F -0.0627451F { align1 }; + /* Cr = Cr - 128/255 */ +add (8) g12<1>F g12<8,8,1>F -0.501961F { align1 }; + /* Cb = Cb - 128 / 255 */ +add (8) g16<1>F g16<8,8,1>F -0.501961F { align1 }; + /* Y = Y * 1.164 */ +mul (8) g14<1>F g14<8,8,1>F 1.164F { align1 }; + /* acc = 1.596 * Cr */ +mul (8) null g12<8,8,1>F 1.596F { align1 }; + /* R = acc + Y */ +mac.sat (8) m2<1>F g14<8,8,1>F 1F { align1 }; + /* acc = Cr * -0.813 */ +mul (8) null g12<8,8,1>F -0.813F { align1 }; + /* acc += Cb * -0.392 */ +mac (8) null g16<8,8,1>F -0.392F { align1 }; + /* G = acc + Y */ +mac.sat (8) m3<1>F g14<8,8,1>F 1F { align1 }; + /* acc = Cb * 2.017 */ +mul (8) null g16<8,8,1>F 2.017F { align1 }; + /* B = acc + Y */ +mac.sat (8) m4<1>F g14<8,8,1>F 1F { align1 }; + /* and do it again */ +add (8) g15<1>F g15<8,8,1>F -0.0627451F { align1 }; +add (8) g13<1>F g13<8,8,1>F -0.501961F { align1 }; +add (8) g17<1>F g17<8,8,1>F -0.501961F { align1 }; +mul (8) g15<1>F g15<8,8,1>F 1.164F { align1 }; +mul (8) null g13<8,8,1>F 1.596F { align1 }; +mac.sat (8) m6<1>F g15<8,8,1>F 1F { align1 }; +mul (8) null g13<8,8,1>F -0.813F { align1 }; +mac (8) null g17<8,8,1>F -0.392F { align1 }; +mac.sat (8) m7<1>F g15<8,8,1>F 1F { align1 }; +mul (8) null g17<8,8,1>F 2.017F { align1 }; +mac.sat (8) m8<1>F g15<8,8,1>F 1F { align1 }; + + /* Pass through control information: + */ +mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable }; + /* Send framebuffer write message: XXX: acc0? */ +send (16) 0 null g0<8,8,1>UW write ( + 0, /* binding table index 0 */ + 8, /* pixel scoreboard clear */ + 4, /* render target write */ + 0 /* no write commit message */ + ) mlen 10 rlen 0 { align1 EOT }; + /* padding */ +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/assembler/gen4asm.h b/assembler/gen4asm.h new file mode 100644 index 0000000..dca7f0f --- /dev/null +++ b/assembler/gen4asm.h @@ -0,0 +1,227 @@ +/* -*- c-basic-offset: 8 -*- */ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#ifndef __GEN4ASM_H__ +#define __GEN4ASM_H__ + +#include <inttypes.h> +#include <stdbool.h> +#include <assert.h> + +#include "brw_reg.h" +#include "brw_defines.h" +#include "brw_structs.h" + +extern long int gen_level; +extern int advanced_flag; +extern int errors; + +#define WARN_ALWAYS (1 << 0) +#define WARN_ALL (1 << 31) +extern unsigned int warning_flags; + +extern char *input_filename; + +extern struct brw_context genasm_context; +extern struct brw_compile genasm_compile; + +/* Predicate for Gen X and above */ +#define IS_GENp(x) (gen_level >= (x)*10) + +/* Predicate for Gen X exactly */ +#define IS_GENx(x) (gen_level >= (x)*10 && gen_level < ((x)+1)*10) + +/* Predicate to match Haswell processors */ +#define IS_HASWELL(x) (gen_level == 75) + +void yyerror (char *msg); + +#define STRUCT_SIZE_ASSERT(TYPE, SIZE) \ +typedef struct { \ + char compile_time_assert_ ## TYPE ## _size[ \ + (sizeof (struct TYPE) == (SIZE)) ? 1 : -1]; \ + } _ ## TYPE ## SizeCheck + +/* ensure nobody changes the size of struct brw_instruction */ +STRUCT_SIZE_ASSERT(brw_instruction, 16); + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +struct condition { + int cond; + int flag_reg_nr; + int flag_subreg_nr; +}; + +struct predicate { + unsigned pred_control:4; + unsigned pred_inverse:1; + unsigned flag_reg_nr:1; + unsigned flag_subreg_nr:1; +}; + +struct options { + unsigned access_mode:1; + unsigned compression_control:2; /* gen6: quater control */ + unsigned thread_control:2; + unsigned dependency_control:2; + unsigned mask_control:1; + unsigned debug_control:1; + unsigned acc_wr_control:1; + + unsigned end_of_thread:1; +}; + +struct region { + int vert_stride, width, horiz_stride; + int is_default; +}; +struct regtype { + int type; + int is_default; +}; + +/** + * This structure is the internal representation of source operands in the + * parser. + */ +struct src_operand { + struct brw_reg reg; + int default_region; + uint32_t imm32; /* set if src_operand is expressing a branch offset */ + char *reloc_target; /* bspec: branching instructions JIP and UIP are source operands */ +} src_operand; + +typedef struct { + enum { + imm32_d, imm32_f + } r; + union { + uint32_t d; + float f; + int32_t signed_d; + } u; +} imm32_t; + +enum assembler_instruction_type { + GEN4ASM_INSTRUCTION_GEN, + GEN4ASM_INSTRUCTION_GEN_RELOCATABLE, + GEN4ASM_INSTRUCTION_LABEL, +}; + +struct label_instruction { + char *name; +}; + +struct relocation { + char *first_reloc_target, *second_reloc_target; // JIP and UIP respectively + int first_reloc_offset, second_reloc_offset; // in number of instructions +}; + +/** + * This structure is just the list container for instructions accumulated by + * the parser and labels. + */ +struct brw_program_instruction { + enum assembler_instruction_type type; + unsigned inst_offset; + union { + struct brw_instruction gen; + struct label_instruction label; + } insn; + struct relocation reloc; + struct brw_program_instruction *next; +}; + +static inline bool is_label(struct brw_program_instruction *instruction) +{ + return instruction->type == GEN4ASM_INSTRUCTION_LABEL; +} + +static inline char *label_name(struct brw_program_instruction *i) +{ + assert(is_label(i)); + return i->insn.label.name; +} + +static inline bool is_relocatable(struct brw_program_instruction *intruction) +{ + return intruction->type == GEN4ASM_INSTRUCTION_GEN_RELOCATABLE; +} + +/** + * This structure is a list of instructions. It is the final output of the + * parser. + */ +struct brw_program { + struct brw_program_instruction *first; + struct brw_program_instruction *last; +}; + +extern struct brw_program compiled_program; + +#define TYPE_B_INDEX 0 +#define TYPE_UB_INDEX 1 +#define TYPE_W_INDEX 2 +#define TYPE_UW_INDEX 3 +#define TYPE_D_INDEX 4 +#define TYPE_UD_INDEX 5 +#define TYPE_F_INDEX 6 + +#define TOTAL_TYPES 7 + +struct program_defaults { + int execute_size; + int execute_type[TOTAL_TYPES]; + int register_type; + int register_type_regfile; + struct region source_region; + struct region source_region_type[TOTAL_TYPES]; + struct region dest_region; + struct region dest_region_type[TOTAL_TYPES]; +}; +extern struct program_defaults program_defaults; + +struct declared_register { + char *name; + struct brw_reg reg; + int element_size; + struct region src_region; + int dst_region; +}; +struct declared_register *find_register(char *name); +void insert_register(struct declared_register *reg); + +int yyparse(void); +int yylex(void); +int yylex_destroy(void); + +char * +lex_text(void); + +#endif /* __GEN4ASM_H__ */ diff --git a/assembler/gram.y b/assembler/gram.y new file mode 100644 index 0000000..50d71d1 --- /dev/null +++ b/assembler/gram.y @@ -0,0 +1,3035 @@ +%{ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdarg.h> +#include <assert.h> +#include "gen4asm.h" +#include "brw_eu.h" + +#define DEFAULT_EXECSIZE (ffs(program_defaults.execute_size) - 1) +#define DEFAULT_DSTREGION -1 + +#define SWIZZLE(reg) (reg.dw1.bits.swizzle) + +#define GEN(i) (&(i)->insn.gen) + +#define YYLTYPE YYLTYPE +typedef struct YYLTYPE +{ + int first_line; + int first_column; + int last_line; + int last_column; +} YYLTYPE; + +extern int need_export; +static struct src_operand src_null_reg = +{ + .reg.file = BRW_ARCHITECTURE_REGISTER_FILE, + .reg.nr = BRW_ARF_NULL, + .reg.type = BRW_REGISTER_TYPE_UD, +}; +static struct brw_reg dst_null_reg = +{ + .file = BRW_ARCHITECTURE_REGISTER_FILE, + .nr = BRW_ARF_NULL, +}; +static struct brw_reg ip_dst = +{ + .file = BRW_ARCHITECTURE_REGISTER_FILE, + .nr = BRW_ARF_IP, + .type = BRW_REGISTER_TYPE_UD, + .address_mode = BRW_ADDRESS_DIRECT, + .hstride = 1, + .dw1.bits.writemask = BRW_WRITEMASK_XYZW, +}; +static struct src_operand ip_src = +{ + .reg.file = BRW_ARCHITECTURE_REGISTER_FILE, + .reg.nr = BRW_ARF_IP, + .reg.type = BRW_REGISTER_TYPE_UD, + .reg.address_mode = BRW_ADDRESS_DIRECT, + .reg.dw1.bits.swizzle = BRW_SWIZZLE_NOOP, +}; + +static int get_type_size(unsigned type); +static void set_instruction_opcode(struct brw_program_instruction *instr, + unsigned opcode); +static int set_instruction_dest(struct brw_program_instruction *instr, + struct brw_reg *dest); +static int set_instruction_src0(struct brw_program_instruction *instr, + struct src_operand *src, + YYLTYPE *location); +static int set_instruction_src1(struct brw_program_instruction *instr, + struct src_operand *src, + YYLTYPE *location); +static int set_instruction_dest_three_src(struct brw_program_instruction *instr, + struct brw_reg *dest); +static int set_instruction_src0_three_src(struct brw_program_instruction *instr, + struct src_operand *src); +static int set_instruction_src1_three_src(struct brw_program_instruction *instr, + struct src_operand *src); +static int set_instruction_src2_three_src(struct brw_program_instruction *instr, + struct src_operand *src); +static void set_instruction_saturate(struct brw_program_instruction *instr, + int saturate); +static void set_instruction_options(struct brw_program_instruction *instr, + struct options options); +static void set_instruction_predicate(struct brw_program_instruction *instr, + struct predicate *p); +static void set_instruction_pred_cond(struct brw_program_instruction *instr, + struct predicate *p, + struct condition *c, + YYLTYPE *location); +static void set_direct_dst_operand(struct brw_reg *dst, struct brw_reg *reg, + int type); +static void set_direct_src_operand(struct src_operand *src, struct brw_reg *reg, + int type); + +enum message_level { + WARN, + ERROR, +}; + +static void message(enum message_level level, YYLTYPE *location, + const char *fmt, ...) +{ + static const char *level_str[] = { "warning", "error" }; + va_list args; + + if (location) + fprintf(stderr, "%s:%d:%d: %s: ", input_filename, location->first_line, + location->first_column, level_str[level]); + else + fprintf(stderr, "%s:%s: ", input_filename, level_str[level]); + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} + +#define warn(flag, l, fmt, ...) \ + do { \ + if (warning_flags & WARN_ ## flag) \ + message(WARN, l, fmt, ## __VA_ARGS__); \ + } while(0) + +#define error(l, fmt, ...) \ + do { \ + message(ERROR, l, fmt, ## __VA_ARGS__); \ + } while(0) + +/* like strcmp, but handles NULL pointers */ +static bool strcmp0(const char *s1, const char* s2) +{ + if (!s1) + return -(s1 != s2); + if (!s2) + return s1 != s2; + return strcmp (s1, s2); +} + +static bool region_equal(struct region *r1, struct region *r2) +{ + return memcmp(r1, r2, sizeof(struct region)) == 0; +} + +static bool reg_equal(struct brw_reg *r1, struct brw_reg *r2) +{ + return memcmp(r1, r2, sizeof(struct brw_reg)) == 0; +} + +static bool declared_register_equal(struct declared_register *r1, + struct declared_register *r2) +{ + if (strcmp0(r1->name, r2->name) != 0) + return false; + + if (!reg_equal(&r1->reg, &r2->reg)) + return false; + + if (!region_equal(&r1->src_region, &r2->src_region)) + return false; + + if (r1->element_size != r2->element_size || + r1->dst_region != r2->dst_region) + return false; + + return true; +} + +static void brw_program_init(struct brw_program *p) +{ + memset(p, 0, sizeof(struct brw_program)); +} + +static void brw_program_append_entry(struct brw_program *p, + struct brw_program_instruction *entry) +{ + entry->next = NULL; + if (p->last) + p->last->next = entry; + else + p->first = entry; + p->last = entry; +} + +static void +brw_program_add_instruction(struct brw_program *p, + struct brw_program_instruction *instruction) +{ + struct brw_program_instruction *list_entry; + + list_entry = calloc(sizeof(struct brw_program_instruction), 1); + list_entry->type = GEN4ASM_INSTRUCTION_GEN; + list_entry->insn.gen = instruction->insn.gen; + brw_program_append_entry(p, list_entry); +} + +static void +brw_program_add_relocatable(struct brw_program *p, + struct brw_program_instruction *instruction) +{ + struct brw_program_instruction *list_entry; + + list_entry = calloc(sizeof(struct brw_program_instruction), 1); + list_entry->type = GEN4ASM_INSTRUCTION_GEN_RELOCATABLE; + list_entry->insn.gen = instruction->insn.gen; + list_entry->reloc = instruction->reloc; + brw_program_append_entry(p, list_entry); +} + +static void brw_program_add_label(struct brw_program *p, const char *label) +{ + struct brw_program_instruction *list_entry; + + list_entry = calloc(sizeof(struct brw_program_instruction), 1); + list_entry->type = GEN4ASM_INSTRUCTION_LABEL; + list_entry->insn.label.name = strdup(label); + brw_program_append_entry(p, list_entry); +} + +static int resolve_dst_region(struct declared_register *reference, int region) +{ + int resolved = region; + + if (resolved == DEFAULT_DSTREGION) { + if (reference) + resolved = reference->dst_region; + else + resolved = 1; + } + + assert(resolved == 1 || resolved == 2 || resolved == 3); + return resolved; +} + +static bool validate_dst_reg(struct brw_instruction *insn, struct brw_reg *reg) +{ + + if (reg->address_mode == BRW_ADDRESS_DIRECT && + insn->header.access_mode == BRW_ALIGN_1 && + reg->dw1.bits.writemask != 0 && + reg->dw1.bits.writemask != BRW_WRITEMASK_XYZW) + { + fprintf(stderr, "error: write mask set in align1 instruction\n"); + return false; + } + + return true; +} + +static bool validate_src_reg(struct brw_instruction *insn, + struct brw_reg reg, + YYLTYPE *location) +{ + int hstride_for_reg[] = {0, 1, 2, 4}; + int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; + int width_for_reg[] = {1, 2, 4, 8, 16}; + int execsize_for_reg[] = {1, 2, 4, 8, 16, 32}; + int width, hstride, vstride, execsize; + + if (reg.file == BRW_IMMEDIATE_VALUE) + return true; + + if (insn->header.access_mode == BRW_ALIGN_1 && + SWIZZLE(reg) && SWIZZLE(reg) != BRW_SWIZZLE_NOOP) + { + error(location, "swizzle bits set in align1 instruction\n"); + return false; + } + + assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg)); + hstride = hstride_for_reg[reg.hstride]; + + if (reg.vstride == 0xf) { + vstride = -1; + } else { + assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg)); + vstride = vstride_for_reg[reg.vstride]; + } + + assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg)); + width = width_for_reg[reg.width]; + + assert(insn->header.execution_size >= 0 && + insn->header.execution_size < ARRAY_SIZE(execsize_for_reg)); + execsize = execsize_for_reg[insn->header.execution_size]; + + /* Register Region Restrictions */ + + /* B. If ExecSize = Width and HorzStride ≠ 0, VertStride must be set to + * Width * HorzStride. */ + if (execsize == width && hstride != 0) { + if (vstride != -1 && vstride != width * hstride); + warn(ALL, location, "execution size == width and hstride != 0 but " + "vstride is not width * hstride\n"); + } + + /* D. If Width = 1, HorzStride must be 0 regardless of the values of + * ExecSize and VertStride. + * + * FIXME: In "advanced mode" hstride is set to 1, this is probably a bug + * to fix, but it changes the generated opcodes and thus needs validation. + */ + if (width == 1 && hstride != 0) + warn(ALL, location, "region width is 1 but horizontal stride is %d " + " (should be 0)\n", hstride); + + /* E. If ExecSize = Width = 1, both VertStride and HorzStride must be 0. + * This defines a scalar. */ + if (execsize == 1 && width == 1) { + if (hstride != 0) + warn(ALL, location, "execution size and region width are 1 but " + "horizontal stride is %d (should be 0)\n", hstride); + if (vstride != 0) + warn(ALL, location, "execution size and region width are 1 but " + "vertical stride is %d (should be 0)\n", vstride); + } + + return true; +} + +static int get_subreg_address(unsigned regfile, unsigned type, unsigned subreg, unsigned address_mode) +{ + int unit_size = 1; + + assert(address_mode == BRW_ADDRESS_DIRECT); + assert(regfile != BRW_IMMEDIATE_VALUE); + + if (advanced_flag) + unit_size = get_type_size(type); + + return subreg * unit_size; +} + +/* only used in indirect address mode. + * input: sub-register number of an address register + * output: the value of AddrSubRegNum in the instruction binary code + * + * input output(advanced_flag==0) output(advanced_flag==1) + * a0.0 0 0 + * a0.1 invalid input 1 + * a0.2 1 2 + * a0.3 invalid input 3 + * a0.4 2 4 + * a0.5 invalid input 5 + * a0.6 3 6 + * a0.7 invalid input 7 + * a0.8 4 invalid input + * a0.10 5 invalid input + * a0.12 6 invalid input + * a0.14 7 invalid input + */ +static int get_indirect_subreg_address(unsigned subreg) +{ + return advanced_flag == 0 ? subreg / 2 : subreg; +} + +static void resolve_subnr(struct brw_reg *reg) +{ + if (reg->file == BRW_IMMEDIATE_VALUE) + return; + + if (reg->address_mode == BRW_ADDRESS_DIRECT) + reg->subnr = get_subreg_address(reg->file, reg->type, reg->subnr, + reg->address_mode); + else + reg->subnr = get_indirect_subreg_address(reg->subnr); +} + + +%} +%locations + +%start ROOT + +%union { + char *string; + int integer; + double number; + struct brw_program_instruction instruction; + struct brw_program program; + struct region region; + struct regtype regtype; + struct brw_reg reg; + struct condition condition; + struct predicate predicate; + struct options options; + struct declared_register symbol_reg; + imm32_t imm32; + + struct src_operand src_operand; +} + +%token COLON +%token SEMICOLON +%token LPAREN RPAREN +%token LANGLE RANGLE +%token LCURLY RCURLY +%token LSQUARE RSQUARE +%token COMMA EQ +%token ABS DOT +%token PLUS MINUS MULTIPLY DIVIDE + +%token <integer> TYPE_UD TYPE_D TYPE_UW TYPE_W TYPE_UB TYPE_B +%token <integer> TYPE_VF TYPE_HF TYPE_V TYPE_F + +%token ALIGN1 ALIGN16 SECHALF COMPR SWITCH ATOMIC NODDCHK NODDCLR +%token MASK_DISABLE BREAKPOINT ACCWRCTRL EOT + +%token SEQ ANY2H ALL2H ANY4H ALL4H ANY8H ALL8H ANY16H ALL16H ANYV ALLV +%token <integer> ZERO EQUAL NOT_ZERO NOT_EQUAL GREATER GREATER_EQUAL LESS LESS_EQUAL +%token <integer> ROUND_INCREMENT OVERFLOW UNORDERED +%token <integer> GENREG MSGREG ADDRESSREG ACCREG FLAGREG +%token <integer> MASKREG AMASK IMASK LMASK CMASK +%token <integer> MASKSTACKREG LMS IMS MASKSTACKDEPTHREG IMSD LMSD +%token <integer> NOTIFYREG STATEREG CONTROLREG IPREG +%token GENREGFILE MSGREGFILE + +%token <integer> MOV FRC RNDU RNDD RNDE RNDZ NOT LZD +%token <integer> MUL MAC MACH LINE SAD2 SADA2 DP4 DPH DP3 DP2 +%token <integer> AVG ADD SEL AND OR XOR SHR SHL ASR CMP CMPN PLN +%token <integer> ADDC BFI1 BFREV CBIT F16TO32 F32TO16 FBH FBL +%token <integer> SEND NOP JMPI IF IFF WHILE ELSE BREAK CONT HALT MSAVE +%token <integer> PUSH MREST POP WAIT DO ENDIF ILLEGAL +%token <integer> MATH_INST +%token <integer> MAD LRP BFE BFI2 SUBB +%token <integer> CALL RET +%token <integer> BRD BRC + +%token NULL_TOKEN MATH SAMPLER GATEWAY READ WRITE URB THREAD_SPAWNER VME DATA_PORT CRE + +%token MSGLEN RETURNLEN +%token <integer> ALLOCATE USED COMPLETE TRANSPOSE INTERLEAVE +%token SATURATE + +%token <integer> INTEGER +%token <string> STRING +%token <number> NUMBER + +%token <integer> INV LOG EXP SQRT RSQ POW SIN COS SINCOS INTDIV INTMOD +%token <integer> INTDIVMOD +%token SIGNED SCALAR + +%token <integer> X Y Z W + +%token <integer> KERNEL_PRAGMA END_KERNEL_PRAGMA CODE_PRAGMA END_CODE_PRAGMA +%token <integer> REG_COUNT_PAYLOAD_PRAGMA REG_COUNT_TOTAL_PRAGMA DECLARE_PRAGMA +%token <integer> BASE ELEMENTSIZE SRCREGION DSTREGION TYPE + +%token <integer> DEFAULT_EXEC_SIZE_PRAGMA DEFAULT_REG_TYPE_PRAGMA +%nonassoc SUBREGNUM +%nonassoc SNDOPR +%left PLUS MINUS +%left MULTIPLY DIVIDE +%right UMINUS +%nonassoc DOT +%nonassoc STR_SYMBOL_REG +%nonassoc EMPTEXECSIZE +%nonassoc LPAREN + +%type <integer> exp sndopr +%type <integer> simple_int +%type <instruction> instruction unaryinstruction binaryinstruction +%type <instruction> binaryaccinstruction trinaryinstruction sendinstruction +%type <instruction> syncinstruction +%type <instruction> msgtarget +%type <instruction> mathinstruction +%type <instruction> nopinstruction +%type <instruction> relocatableinstruction breakinstruction +%type <instruction> ifelseinstruction loopinstruction haltinstruction +%type <instruction> multibranchinstruction subroutineinstruction jumpinstruction +%type <string> label +%type <program> instrseq +%type <integer> instoption +%type <integer> unaryop binaryop binaryaccop breakop +%type <integer> trinaryop +%type <condition> conditionalmodifier +%type <predicate> predicate +%type <options> instoptions instoption_list +%type <integer> condition saturate negate abs chansel +%type <integer> writemask_x writemask_y writemask_z writemask_w +%type <integer> srcimmtype execsize dstregion immaddroffset +%type <integer> subregnum sampler_datatype +%type <integer> urb_swizzle urb_allocate urb_used urb_complete +%type <integer> math_function math_signed math_scalar +%type <integer> predctrl predstate +%type <region> region region_wh indirectregion declare_srcregion; +%type <regtype> regtype +%type <reg> directgenreg directmsgreg addrreg accreg flagreg maskreg +%type <reg> maskstackreg notifyreg +/* %type <reg> maskstackdepthreg */ +%type <reg> statereg controlreg ipreg nullreg +%type <reg> dstoperandex_typed srcarchoperandex_typed +%type <reg> sendleadreg +%type <reg> indirectgenreg indirectmsgreg addrparam +%type <integer> mask_subreg maskstack_subreg +%type <integer> declare_elementsize declare_dstregion declare_type +/* %type <intger> maskstackdepth_subreg */ +%type <symbol_reg> symbol_reg symbol_reg_p; +%type <imm32> imm32 +%type <reg> dst dstoperand dstoperandex dstreg post_dst writemask +%type <reg> declare_base +%type <src_operand> directsrcoperand srcarchoperandex directsrcaccoperand +%type <src_operand> indirectsrcoperand +%type <src_operand> src srcimm imm32reg payload srcacc srcaccimm swizzle +%type <src_operand> relativelocation relativelocation2 + +%code { + +#undef error +#define error(l, fmt, ...) \ + do { \ + message(ERROR, l, fmt, ## __VA_ARGS__); \ + YYERROR; \ + } while(0) + +static void add_option(struct options *options, int option) +{ + switch (option) { + case ALIGN1: + options->access_mode = BRW_ALIGN_1; + break; + case ALIGN16: + options->access_mode = BRW_ALIGN_16; + break; + case SECHALF: + options->compression_control |= BRW_COMPRESSION_2NDHALF; + break; + case COMPR: + if (!IS_GENp(6)) + options->compression_control |= BRW_COMPRESSION_COMPRESSED; + break; + case SWITCH: + options->thread_control |= BRW_THREAD_SWITCH; + break; + case ATOMIC: + options->thread_control |= BRW_THREAD_ATOMIC; + break; + case NODDCHK: + options->dependency_control |= BRW_DEPENDENCY_NOTCHECKED; + break; + case NODDCLR: + options->dependency_control |= BRW_DEPENDENCY_NOTCLEARED; + break; + case MASK_DISABLE: + options->mask_control = BRW_MASK_DISABLE; + break; + case BREAKPOINT: + options->debug_control = BRW_DEBUG_BREAKPOINT; + break; + case ACCWRCTRL: + options->acc_wr_control = BRW_ACCUMULATOR_WRITE_ENABLE; + break; + case EOT: + options->end_of_thread = 1; + break; + } +} + +} + +%% +simple_int: INTEGER { $$ = $1; } + | MINUS INTEGER { $$ = -$2;} +; + +exp: INTEGER { $$ = $1; } + | exp PLUS exp { $$ = $1 + $3; } + | exp MINUS exp { $$ = $1 - $3; } + | exp MULTIPLY exp { $$ = $1 * $3; } + | exp DIVIDE exp { if ($3) $$ = $1 / $3; else YYERROR;} + | MINUS exp %prec UMINUS { $$ = -$2;} + | LPAREN exp RPAREN { $$ = $2; } + ; + +ROOT: instrseq + { + compiled_program = $1; + } +; + + +label: STRING COLON +; + +declare_base: BASE EQ dstreg + { + $$ = $3; + } +; +declare_elementsize: ELEMENTSIZE EQ exp + { + $$ = $3; + } +; +declare_srcregion: /* empty */ + { + /* XXX is this default correct?*/ + memset (&$$, '\0', sizeof ($$)); + $$.vert_stride = ffs(0); + $$.width = BRW_WIDTH_1; + $$.horiz_stride = ffs(0); + } + | SRCREGION EQ region + { + $$ = $3; + } +; +declare_dstregion: /* empty */ + { + $$ = 1; + } + | DSTREGION EQ dstregion + { + $$ = $3; + } +; +declare_type: TYPE EQ regtype + { + $$ = $3.type; + } +; +declare_pragma: DECLARE_PRAGMA STRING declare_base declare_elementsize declare_srcregion declare_dstregion declare_type + { + struct declared_register reg, *found, *new_reg; + + reg.name = $2; + reg.reg = $3; + reg.element_size = $4; + reg.src_region = $5; + reg.dst_region = $6; + reg.reg.type = $7; + + found = find_register($2); + if (found) { + if (!declared_register_equal(®, found)) + error(&@1, "%s already defined and definitions " + "don't agree\n", $2); + free($2); // $2 has been malloc'ed by strdup + } else { + new_reg = malloc(sizeof(struct declared_register)); + *new_reg = reg; + insert_register(new_reg); + } + } +; + +reg_count_total_pragma: REG_COUNT_TOTAL_PRAGMA exp +; +reg_count_payload_pragma: REG_COUNT_PAYLOAD_PRAGMA exp +; + +default_exec_size_pragma: DEFAULT_EXEC_SIZE_PRAGMA exp + { + program_defaults.execute_size = $2; + } +; +default_reg_type_pragma: DEFAULT_REG_TYPE_PRAGMA regtype + { + program_defaults.register_type = $2.type; + } +; +pragma: reg_count_total_pragma + |reg_count_payload_pragma + |default_exec_size_pragma + |default_reg_type_pragma + |declare_pragma +; + +instrseq: instrseq pragma + { + $$ = $1; + } + | instrseq instruction SEMICOLON + { + brw_program_add_instruction(&$1, &$2); + $$ = $1; + } + | instruction SEMICOLON + { + brw_program_init(&$$); + brw_program_add_instruction(&$$, &$1); + } + | instrseq relocatableinstruction SEMICOLON + { + brw_program_add_relocatable(&$1, &$2); + $$ = $1; + } + | relocatableinstruction SEMICOLON + { + brw_program_init(&$$); + brw_program_add_relocatable(&$$, &$1); + } + | instrseq SEMICOLON + { + $$ = $1; + } + | instrseq label + { + brw_program_add_label(&$1, $2); + $$ = $1; + } + | label + { + brw_program_init(&$$); + brw_program_add_label(&$$, $1); + } + | pragma + { + $$.first = NULL; + $$.last = NULL; + } + | instrseq error SEMICOLON { + $$ = $1; + } +; + +/* 1.4.1: Instruction groups */ +// binaryinstruction: Source operands cannot be accumulators +// binaryaccinstruction: Source operands can be accumulators +instruction: unaryinstruction + | binaryinstruction + | binaryaccinstruction + | trinaryinstruction + | sendinstruction + | syncinstruction + | mathinstruction + | nopinstruction +; + +/* relocatableinstruction are instructions that needs a relocation pass */ +relocatableinstruction: ifelseinstruction + | loopinstruction + | haltinstruction + | multibranchinstruction + | subroutineinstruction + | jumpinstruction + | breakinstruction +; + +ifelseinstruction: ENDIF + { + // for Gen4 + if(IS_GENp(6)) // For gen6+. + error(&@1, "should be 'ENDIF execsize relativelocation'\n"); + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $1); + GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH; + GEN(&$$)->bits1.da1.dest_horiz_stride = 1; + GEN(&$$)->bits1.da1.src1_reg_file = BRW_ARCHITECTURE_REGISTER_FILE; + GEN(&$$)->bits1.da1.src1_reg_type = BRW_REGISTER_TYPE_UD; + } + | ENDIF execsize relativelocation instoptions + { + // for Gen6+ + /* Gen6, Gen7 bspec: predication is prohibited */ + if(!IS_GENp(6)) // for gen6- + error(&@1, "ENDIF Syntax error: should be 'ENDIF'\n"); + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $1); + GEN(&$$)->header.execution_size = $2; + $$.reloc.first_reloc_target = $3.reloc_target; + $$.reloc.first_reloc_offset = $3.imm32; + } + | ELSE execsize relativelocation instoptions + { + if(!IS_GENp(6)) { + // for Gen4, Gen5. gen_level < 60 + /* Set the istack pop count, which must always be 1. */ + $3.imm32 |= (1 << 16); + + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $1); + GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH; + ip_dst.width = $2; + set_instruction_dest(&$$, &ip_dst); + set_instruction_src0(&$$, &ip_src, NULL); + set_instruction_src1(&$$, &$3, NULL); + $$.reloc.first_reloc_target = $3.reloc_target; + $$.reloc.first_reloc_offset = $3.imm32; + } else if(IS_GENp(6)) { + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $1); + GEN(&$$)->header.execution_size = $2; + $$.reloc.first_reloc_target = $3.reloc_target; + $$.reloc.first_reloc_offset = $3.imm32; + } else { + error(&@1, "'ELSE' instruction is not implemented.\n"); + } + } + | predicate IF execsize relativelocation + { + /* The branch instructions require that the IP register + * be the destination and first source operand, while the + * offset is the second source operand. The offset is added + * to the pre-incremented IP. + */ + if(IS_GENp(7)) /* Error in Gen7+. */ + error(&@2, "IF should be 'IF execsize JIP UIP'\n"); + + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + if(!IS_GENp(6)) { + GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH; + ip_dst.width = $3; + set_instruction_dest(&$$, &ip_dst); + set_instruction_src0(&$$, &ip_src, NULL); + set_instruction_src1(&$$, &$4, NULL); + } + $$.reloc.first_reloc_target = $4.reloc_target; + $$.reloc.first_reloc_offset = $4.imm32; + } + | predicate IF execsize relativelocation relativelocation + { + /* for Gen7+ */ + if(!IS_GENp(7)) + error(&@2, "IF should be 'IF execsize relativelocation'\n"); + + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.execution_size = $3; + $$.reloc.first_reloc_target = $4.reloc_target; + $$.reloc.first_reloc_offset = $4.imm32; + $$.reloc.second_reloc_target = $5.reloc_target; + $$.reloc.second_reloc_offset = $5.imm32; + } +; + +loopinstruction: predicate WHILE execsize relativelocation instoptions + { + if(!IS_GENp(6)) { + /* The branch instructions require that the IP register + * be the destination and first source operand, while the + * offset is the second source operand. The offset is added + * to the pre-incremented IP. + */ + ip_dst.width = $3; + set_instruction_dest(&$$, &ip_dst); + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH; + set_instruction_src0(&$$, &ip_src, NULL); + set_instruction_src1(&$$, &$4, NULL); + $$.reloc.first_reloc_target = $4.reloc_target; + $$.reloc.first_reloc_offset = $4.imm32; + } else if (IS_GENp(6)) { + /* Gen6 spec: + dest must have the same element size as src0. + dest horizontal stride must be 1. */ + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.execution_size = $3; + $$.reloc.first_reloc_target = $4.reloc_target; + $$.reloc.first_reloc_offset = $4.imm32; + } else { + error(&@2, "'WHILE' instruction is not implemented!\n"); + } + } + | DO + { + // deprecated + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $1); + }; + +haltinstruction: predicate HALT execsize relativelocation relativelocation instoptions + { + // for Gen6, Gen7 + /* Gen6, Gen7 bspec: dst and src0 must be the null reg. */ + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + $$.reloc.first_reloc_target = $4.reloc_target; + $$.reloc.first_reloc_offset = $4.imm32; + $$.reloc.second_reloc_target = $5.reloc_target; + $$.reloc.second_reloc_offset = $5.imm32; + dst_null_reg.width = $3; + set_instruction_dest(&$$, &dst_null_reg); + set_instruction_src0(&$$, &src_null_reg, NULL); + }; + +multibranchinstruction: + predicate BRD execsize relativelocation instoptions + { + /* Gen7 bspec: dest must be null. use Switch option */ + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH; + $$.reloc.first_reloc_target = $4.reloc_target; + $$.reloc.first_reloc_offset = $4.imm32; + dst_null_reg.width = $3; + set_instruction_dest(&$$, &dst_null_reg); + } + | predicate BRC execsize relativelocation relativelocation instoptions + { + /* Gen7 bspec: dest must be null. src0 must be null. use Switch option */ + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH; + $$.reloc.first_reloc_target = $4.reloc_target; + $$.reloc.first_reloc_offset = $4.imm32; + $$.reloc.second_reloc_target = $5.reloc_target; + $$.reloc.second_reloc_offset = $5.imm32; + dst_null_reg.width = $3; + set_instruction_dest(&$$, &dst_null_reg); + set_instruction_src0(&$$, &src_null_reg, NULL); + } +; + +subroutineinstruction: + predicate CALL execsize dst relativelocation instoptions + { + /* + Gen6 bspec: + source, dest type should be DWORD. + dest must be QWord aligned. + source0 region control must be <2,2,1>. + execution size must be 2. + QtrCtrl is prohibited. + JIP is an immediate operand, must be of type W. + Gen7 bspec: + source, dest type should be DWORD. + dest must be QWord aligned. + source0 region control must be <2,2,1>. + execution size must be 2. + */ + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + + $4.type = BRW_REGISTER_TYPE_D; /* dest type should be DWORD */ + $4.width = BRW_WIDTH_2; /* execution size must be 2. */ + set_instruction_dest(&$$, &$4); + + struct src_operand src0; + memset(&src0, 0, sizeof(src0)); + src0.reg.type = BRW_REGISTER_TYPE_D; /* source type should be DWORD */ + /* source0 region control must be <2,2,1>. */ + src0.reg.hstride = 1; /*encoded 1*/ + src0.reg.width = BRW_WIDTH_2; + src0.reg.vstride = 2; /*encoded 2*/ + set_instruction_src0(&$$, &src0, NULL); + + $$.reloc.first_reloc_target = $5.reloc_target; + $$.reloc.first_reloc_offset = $5.imm32; + } + | predicate RET execsize dstoperandex src instoptions + { + /* + Gen6, 7: + source cannot be accumulator. + dest must be null. + src0 region control must be <2,2,1> (not specified clearly. should be same as CALL) + */ + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + dst_null_reg.width = BRW_WIDTH_2; /* execution size of RET should be 2 */ + set_instruction_dest(&$$, &dst_null_reg); + $5.reg.type = BRW_REGISTER_TYPE_D; + $5.reg.hstride = 1; /*encoded 1*/ + $5.reg.width = BRW_WIDTH_2; + $5.reg.vstride = 2; /*encoded 2*/ + set_instruction_src0(&$$, &$5, NULL); + } +; + +unaryinstruction: + predicate unaryop conditionalmodifier saturate execsize + dst srcaccimm instoptions + { + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + set_instruction_saturate(&$$, $4); + $6.width = $5; + set_instruction_options(&$$, $8); + set_instruction_pred_cond(&$$, &$1, &$3, &@3); + if (set_instruction_dest(&$$, &$6) != 0) + YYERROR; + if (set_instruction_src0(&$$, &$7, &@7) != 0) + YYERROR; + + if (!IS_GENp(6) && + get_type_size(GEN(&$$)->bits1.da1.dest_reg_type) * (1 << $6.width) == 64) + GEN(&$$)->header.compression_control = BRW_COMPRESSION_COMPRESSED; + } +; + +unaryop: MOV | FRC | RNDU | RNDD | RNDE | RNDZ | NOT | LZD | BFREV | CBIT + | F16TO32 | F32TO16 | FBH | FBL +; + +// Source operands cannot be accumulators +binaryinstruction: + predicate binaryop conditionalmodifier saturate execsize + dst src srcimm instoptions + { + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + set_instruction_saturate(&$$, $4); + set_instruction_options(&$$, $9); + set_instruction_pred_cond(&$$, &$1, &$3, &@3); + $6.width = $5; + if (set_instruction_dest(&$$, &$6) != 0) + YYERROR; + if (set_instruction_src0(&$$, &$7, &@7) != 0) + YYERROR; + if (set_instruction_src1(&$$, &$8, &@8) != 0) + YYERROR; + + if (!IS_GENp(6) && + get_type_size(GEN(&$$)->bits1.da1.dest_reg_type) * (1 << $6.width) == 64) + GEN(&$$)->header.compression_control = BRW_COMPRESSION_COMPRESSED; + } +; + +/* bspec: BFI1 should not access accumulator. */ +binaryop: MUL | MAC | MACH | LINE | SAD2 | SADA2 | DP4 | DPH | DP3 | DP2 | PLN | BFI1 +; + +// Source operands can be accumulators +binaryaccinstruction: + predicate binaryaccop conditionalmodifier saturate execsize + dst srcacc srcimm instoptions + { + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + set_instruction_saturate(&$$, $4); + $6.width = $5; + set_instruction_options(&$$, $9); + set_instruction_pred_cond(&$$, &$1, &$3, &@3); + if (set_instruction_dest(&$$, &$6) != 0) + YYERROR; + if (set_instruction_src0(&$$, &$7, &@7) != 0) + YYERROR; + if (set_instruction_src1(&$$, &$8, &@8) != 0) + YYERROR; + + if (!IS_GENp(6) && + get_type_size(GEN(&$$)->bits1.da1.dest_reg_type) * (1 << $6.width) == 64) + GEN(&$$)->header.compression_control = BRW_COMPRESSION_COMPRESSED; + } +; + +/* TODO: bspec says ADDC/SUBB/CMP/CMPN/SHL/BFI1 cannot use accumulator as dest. */ +binaryaccop: AVG | ADD | SEL | AND | OR | XOR | SHR | SHL | ASR | CMP | CMPN | ADDC | SUBB +; + +trinaryop: MAD | LRP | BFE | BFI2 +; + +trinaryinstruction: + predicate trinaryop conditionalmodifier saturate execsize + dst src src src instoptions +{ + memset(&$$, 0, sizeof($$)); + + set_instruction_pred_cond(&$$, &$1, &$3, &@3); + + set_instruction_opcode(&$$, $2); + set_instruction_saturate(&$$, $4); + + $6.width = $5; + if (set_instruction_dest_three_src(&$$, &$6)) + YYERROR; + if (set_instruction_src0_three_src(&$$, &$7)) + YYERROR; + if (set_instruction_src1_three_src(&$$, &$8)) + YYERROR; + if (set_instruction_src2_three_src(&$$, &$9)) + YYERROR; + set_instruction_options(&$$, $10); +} +; + +sendinstruction: predicate SEND execsize exp post_dst payload msgtarget + MSGLEN exp RETURNLEN exp instoptions + { + /* Send instructions are messy. The first argument is the + * post destination -- the grf register that the response + * starts from. The second argument is the current + * destination, which is the start of the message arguments + * to the shared function, and where src0 payload is loaded + * to if not null. The payload is typically based on the + * grf 0 thread payload of your current thread, and is + * implicitly loaded if non-null. + */ + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + $5.width = $3; + GEN(&$$)->header.destreg__conditionalmod = $4; /* msg reg index */ + set_instruction_predicate(&$$, &$1); + if (set_instruction_dest(&$$, &$5) != 0) + YYERROR; + + if (IS_GENp(6)) { + struct src_operand src0; + + memset(&src0, 0, sizeof(src0)); + src0.reg.address_mode = BRW_ADDRESS_DIRECT; + + if (IS_GENp(7)) + src0.reg.file = BRW_GENERAL_REGISTER_FILE; + else + src0.reg.file = BRW_MESSAGE_REGISTER_FILE; + + src0.reg.type = BRW_REGISTER_TYPE_D; + src0.reg.nr = $4; + src0.reg.subnr = 0; + set_instruction_src0(&$$, &src0, NULL); + } else { + if (set_instruction_src0(&$$, &$6, &@6) != 0) + YYERROR; + } + + GEN(&$$)->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE; + GEN(&$$)->bits1.da1.src1_reg_type = BRW_REGISTER_TYPE_D; + + if (IS_GENp(5)) { + if (IS_GENp(6)) { + GEN(&$$)->header.destreg__conditionalmod = GEN(&$7)->bits2.send_gen5.sfid; + } else { + GEN(&$$)->header.destreg__conditionalmod = $4; /* msg reg index */ + GEN(&$$)->bits2.send_gen5.sfid = GEN(&$7)->bits2.send_gen5.sfid; + GEN(&$$)->bits2.send_gen5.end_of_thread = $12.end_of_thread; + } + + GEN(&$$)->bits3.generic_gen5 = GEN(&$7)->bits3.generic_gen5; + GEN(&$$)->bits3.generic_gen5.msg_length = $9; + GEN(&$$)->bits3.generic_gen5.response_length = $11; + GEN(&$$)->bits3.generic_gen5.end_of_thread = $12.end_of_thread; + } else { + GEN(&$$)->header.destreg__conditionalmod = $4; /* msg reg index */ + GEN(&$$)->bits3.generic = GEN(&$7)->bits3.generic; + GEN(&$$)->bits3.generic.msg_length = $9; + GEN(&$$)->bits3.generic.response_length = $11; + GEN(&$$)->bits3.generic.end_of_thread = $12.end_of_thread; + } + } + | predicate SEND execsize dst sendleadreg payload directsrcoperand instoptions + { + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.destreg__conditionalmod = $5.nr; /* msg reg index */ + + set_instruction_predicate(&$$, &$1); + + $4.width = $3; + if (set_instruction_dest(&$$, &$4) != 0) + YYERROR; + if (set_instruction_src0(&$$, &$6, &@6) != 0) + YYERROR; + /* XXX is this correct? */ + if (set_instruction_src1(&$$, &$7, &@7) != 0) + YYERROR; + + } + | predicate SEND execsize dst sendleadreg payload imm32reg instoptions + { + if ($7.reg.type != BRW_REGISTER_TYPE_UD && + $7.reg.type != BRW_REGISTER_TYPE_D && + $7.reg.type != BRW_REGISTER_TYPE_V) { + error (&@7, "non-int D/UD/V representation: %d," + "type=%d\n", $7.reg.dw1.ud, $7.reg.type); + } + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.destreg__conditionalmod = $5.nr; /* msg reg index */ + + set_instruction_predicate(&$$, &$1); + $4.width = $3; + if (set_instruction_dest(&$$, &$4) != 0) + YYERROR; + if (set_instruction_src0(&$$, &$6, &@6) != 0) + YYERROR; + if (set_instruction_src1(&$$, &$7, &@7) != 0) + YYERROR; + } + | predicate SEND execsize dst sendleadreg sndopr imm32reg instoptions + { + struct src_operand src0; + + if (!IS_GENp(6)) + error(&@2, "the syntax of send instruction\n"); + + if ($7.reg.type != BRW_REGISTER_TYPE_UD && + $7.reg.type != BRW_REGISTER_TYPE_D && + $7.reg.type != BRW_REGISTER_TYPE_V) { + error(&@7,"non-int D/UD/V representation: %d," + "type=%d\n", $7.reg.dw1.ud, $7.reg.type); + } + + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.destreg__conditionalmod = ($6 & EX_DESC_SFID_MASK); /* SFID */ + set_instruction_predicate(&$$, &$1); + + $4.width = $3; + if (set_instruction_dest(&$$, &$4) != 0) + YYERROR; + + memset(&src0, 0, sizeof(src0)); + src0.reg.address_mode = BRW_ADDRESS_DIRECT; + + if (IS_GENp(7)) { + src0.reg.file = BRW_GENERAL_REGISTER_FILE; + src0.reg.type = BRW_REGISTER_TYPE_UB; + } else { + src0.reg.file = BRW_MESSAGE_REGISTER_FILE; + src0.reg.type = BRW_REGISTER_TYPE_D; + } + + src0.reg.nr = $5.nr; + src0.reg.subnr = 0; + set_instruction_src0(&$$, &src0, NULL); + set_instruction_src1(&$$, &$7, NULL); + + GEN(&$$)->bits3.generic_gen5.end_of_thread = !!($6 & EX_DESC_EOT_MASK); + } + | predicate SEND execsize dst sendleadreg sndopr directsrcoperand instoptions + { + struct src_operand src0; + + if (!IS_GENp(6)) + error(&@2, "the syntax of send instruction\n"); + + if ($7.reg.file != BRW_ARCHITECTURE_REGISTER_FILE || + ($7.reg.nr & 0xF0) != BRW_ARF_ADDRESS || + ($7.reg.nr & 0x0F) != 0 || + $7.reg.subnr != 0) { + error (&@7, "scalar register must be a0.0<0;1,0>:ud\n"); + } + + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.destreg__conditionalmod = ($6 & EX_DESC_SFID_MASK); /* SFID */ + set_instruction_predicate(&$$, &$1); + + $4.width = $3; + if (set_instruction_dest(&$$, &$4) != 0) + YYERROR; + + memset(&src0, 0, sizeof(src0)); + src0.reg.address_mode = BRW_ADDRESS_DIRECT; + + if (IS_GENp(7)) { + src0.reg.file = BRW_GENERAL_REGISTER_FILE; + src0.reg.type = BRW_REGISTER_TYPE_UB; + } else { + src0.reg.file = BRW_MESSAGE_REGISTER_FILE; + src0.reg.type = BRW_REGISTER_TYPE_D; + } + + src0.reg.nr = $5.nr; + src0.reg.subnr = 0; + set_instruction_src0(&$$, &src0, NULL); + + set_instruction_src1(&$$, &$7, &@7); + GEN(&$$)->bits3.generic_gen5.end_of_thread = !!($6 & EX_DESC_EOT_MASK); + } + | predicate SEND execsize dst sendleadreg payload sndopr imm32reg instoptions + { + if ($8.reg.type != BRW_REGISTER_TYPE_UD && + $8.reg.type != BRW_REGISTER_TYPE_D && + $8.reg.type != BRW_REGISTER_TYPE_V) { + error(&@8, "non-int D/UD/V representation: %d," + "type=%d\n", $8.reg.dw1.ud, $8.reg.type); + } + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.destreg__conditionalmod = $5.nr; /* msg reg index */ + + set_instruction_predicate(&$$, &$1); + $4.width = $3; + if (set_instruction_dest(&$$, &$4) != 0) + YYERROR; + if (set_instruction_src0(&$$, &$6, &@6) != 0) + YYERROR; + if (set_instruction_src1(&$$, &$8, &@8) != 0) + YYERROR; + + if (IS_GENx(5)) { + GEN(&$$)->bits2.send_gen5.sfid = ($7 & EX_DESC_SFID_MASK); + GEN(&$$)->bits3.generic_gen5.end_of_thread = !!($7 & EX_DESC_EOT_MASK); + } + } + | predicate SEND execsize dst sendleadreg payload exp directsrcoperand instoptions + { + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.destreg__conditionalmod = $5.nr; /* msg reg index */ + + set_instruction_predicate(&$$, &$1); + + $4.width = $3; + if (set_instruction_dest(&$$, &$4) != 0) + YYERROR; + if (set_instruction_src0(&$$, &$6, &@6) != 0) + YYERROR; + /* XXX is this correct? */ + if (set_instruction_src1(&$$, &$8, &@8) != 0) + YYERROR; + if (IS_GENx(5)) { + GEN(&$$)->bits2.send_gen5.sfid = $7; + } + } + +; + +sndopr: exp %prec SNDOPR + { + $$ = $1; + } +; + +jumpinstruction: predicate JMPI execsize relativelocation2 + { + /* The jump instruction requires that the IP register + * be the destination and first source operand, while the + * offset is the second source operand. The next instruction + * is the post-incremented IP plus the offset. + */ + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + if(advanced_flag) + GEN(&$$)->header.mask_control = BRW_MASK_DISABLE; + set_instruction_predicate(&$$, &$1); + ip_dst.width = BRW_WIDTH_1; + set_instruction_dest(&$$, &ip_dst); + set_instruction_src0(&$$, &ip_src, NULL); + set_instruction_src1(&$$, &$4, NULL); + $$.reloc.first_reloc_target = $4.reloc_target; + $$.reloc.first_reloc_offset = $4.imm32; + } +; + +mathinstruction: predicate MATH_INST execsize dst src srcimm math_function instoptions + { + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.destreg__conditionalmod = $7; + set_instruction_options(&$$, $8); + set_instruction_predicate(&$$, &$1); + $4.width = $3; + if (set_instruction_dest(&$$, &$4) != 0) + YYERROR; + if (set_instruction_src0(&$$, &$5, &@5) != 0) + YYERROR; + if (set_instruction_src1(&$$, &$6, &@6) != 0) + YYERROR; + } +; + +breakinstruction: predicate breakop execsize relativelocation relativelocation instoptions + { + // for Gen6, Gen7 + memset(&$$, 0, sizeof($$)); + set_instruction_predicate(&$$, &$1); + set_instruction_opcode(&$$, $2); + GEN(&$$)->header.execution_size = $3; + $$.reloc.first_reloc_target = $4.reloc_target; + $$.reloc.first_reloc_offset = $4.imm32; + $$.reloc.second_reloc_target = $5.reloc_target; + $$.reloc.second_reloc_offset = $5.imm32; + } +; + +breakop: BREAK | CONT +; + +/* +maskpushop: MSAVE | PUSH +; + */ + +syncinstruction: predicate WAIT notifyreg + { + struct brw_reg notify_dst; + struct src_operand notify_src; + + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $2); + set_direct_dst_operand(¬ify_dst, &$3, BRW_REGISTER_TYPE_D); + notify_dst.width = BRW_WIDTH_1; + set_instruction_dest(&$$, ¬ify_dst); + set_direct_src_operand(¬ify_src, &$3, BRW_REGISTER_TYPE_D); + set_instruction_src0(&$$, ¬ify_src, NULL); + set_instruction_src1(&$$, &src_null_reg, NULL); + } + +; + +nopinstruction: NOP + { + memset(&$$, 0, sizeof($$)); + set_instruction_opcode(&$$, $1); + }; + +/* XXX! */ +payload: directsrcoperand +; + +post_dst: dst +; + +msgtarget: NULL_TOKEN + { + if (IS_GENp(5)) { + GEN(&$$)->bits2.send_gen5.sfid= BRW_SFID_NULL; + GEN(&$$)->bits3.generic_gen5.header_present = 0; /* ??? */ + } else { + GEN(&$$)->bits3.generic.msg_target = BRW_SFID_NULL; + } + } + | SAMPLER LPAREN INTEGER COMMA INTEGER COMMA + sampler_datatype RPAREN + { + if (IS_GENp(7)) { + GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_SAMPLER; + GEN(&$$)->bits3.generic_gen5.header_present = 1; /* ??? */ + GEN(&$$)->bits3.sampler_gen7.binding_table_index = $3; + GEN(&$$)->bits3.sampler_gen7.sampler = $5; + GEN(&$$)->bits3.sampler_gen7.simd_mode = 2; /* SIMD16, maybe we should add a new parameter */ + } else if (IS_GENp(5)) { + GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_SAMPLER; + GEN(&$$)->bits3.generic_gen5.header_present = 1; /* ??? */ + GEN(&$$)->bits3.sampler_gen5.binding_table_index = $3; + GEN(&$$)->bits3.sampler_gen5.sampler = $5; + GEN(&$$)->bits3.sampler_gen5.simd_mode = 2; /* SIMD16, maybe we should add a new parameter */ + } else { + GEN(&$$)->bits3.generic.msg_target = BRW_SFID_SAMPLER; + GEN(&$$)->bits3.sampler.binding_table_index = $3; + GEN(&$$)->bits3.sampler.sampler = $5; + switch ($7) { + case TYPE_F: + GEN(&$$)->bits3.sampler.return_format = + BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + break; + case TYPE_UD: + GEN(&$$)->bits3.sampler.return_format = + BRW_SAMPLER_RETURN_FORMAT_UINT32; + break; + case TYPE_D: + GEN(&$$)->bits3.sampler.return_format = + BRW_SAMPLER_RETURN_FORMAT_SINT32; + break; + } + } + } + | MATH math_function saturate math_signed math_scalar + { + if (IS_GENp(6)) { + error (&@1, "Gen6+ doesn't have math function\n"); + } else if (IS_GENx(5)) { + GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_MATH; + GEN(&$$)->bits3.generic_gen5.header_present = 0; + GEN(&$$)->bits3.math_gen5.function = $2; + set_instruction_saturate(&$$, $3); + GEN(&$$)->bits3.math_gen5.int_type = $4; + GEN(&$$)->bits3.math_gen5.precision = BRW_MATH_PRECISION_FULL; + GEN(&$$)->bits3.math_gen5.data_type = $5; + } else { + GEN(&$$)->bits3.generic.msg_target = BRW_SFID_MATH; + GEN(&$$)->bits3.math.function = $2; + set_instruction_saturate(&$$, $3); + GEN(&$$)->bits3.math.int_type = $4; + GEN(&$$)->bits3.math.precision = BRW_MATH_PRECISION_FULL; + GEN(&$$)->bits3.math.data_type = $5; + } + } + | GATEWAY + { + if (IS_GENp(5)) { + GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_MESSAGE_GATEWAY; + GEN(&$$)->bits3.generic_gen5.header_present = 0; /* ??? */ + } else { + GEN(&$$)->bits3.generic.msg_target = BRW_SFID_MESSAGE_GATEWAY; + } + } + | READ LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA + INTEGER RPAREN + { + if (IS_GENx(7)) { + GEN(&$$)->bits2.send_gen5.sfid = + GEN6_SFID_DATAPORT_SAMPLER_CACHE; + GEN(&$$)->bits3.generic_gen5.header_present = 1; + GEN(&$$)->bits3.gen7_dp.binding_table_index = $3; + GEN(&$$)->bits3.gen7_dp.msg_control = $7; + GEN(&$$)->bits3.gen7_dp.msg_type = $9; + } else if (IS_GENx(6)) { + GEN(&$$)->bits2.send_gen5.sfid = + GEN6_SFID_DATAPORT_SAMPLER_CACHE; + GEN(&$$)->bits3.generic_gen5.header_present = 1; + GEN(&$$)->bits3.gen6_dp_sampler_const_cache.binding_table_index = $3; + GEN(&$$)->bits3.gen6_dp_sampler_const_cache.msg_control = $7; + GEN(&$$)->bits3.gen6_dp_sampler_const_cache.msg_type = $9; + } else if (IS_GENx(5)) { + GEN(&$$)->bits2.send_gen5.sfid = + BRW_SFID_DATAPORT_READ; + GEN(&$$)->bits3.generic_gen5.header_present = 1; + GEN(&$$)->bits3.dp_read_gen5.binding_table_index = $3; + GEN(&$$)->bits3.dp_read_gen5.target_cache = $5; + GEN(&$$)->bits3.dp_read_gen5.msg_control = $7; + GEN(&$$)->bits3.dp_read_gen5.msg_type = $9; + } else { + GEN(&$$)->bits3.generic.msg_target = + BRW_SFID_DATAPORT_READ; + GEN(&$$)->bits3.dp_read.binding_table_index = $3; + GEN(&$$)->bits3.dp_read.target_cache = $5; + GEN(&$$)->bits3.dp_read.msg_control = $7; + GEN(&$$)->bits3.dp_read.msg_type = $9; + } + } + | WRITE LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA + INTEGER RPAREN + { + if (IS_GENx(7)) { + GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + GEN(&$$)->bits3.generic_gen5.header_present = 1; + GEN(&$$)->bits3.gen7_dp.binding_table_index = $3; + GEN(&$$)->bits3.gen7_dp.msg_control = $5; + GEN(&$$)->bits3.gen7_dp.msg_type = $7; + } else if (IS_GENx(6)) { + GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + /* Sandybridge supports headerlesss message for render target write. + * Currently the GFX assembler doesn't support it. so the program must provide + * message header + */ + GEN(&$$)->bits3.generic_gen5.header_present = 1; + GEN(&$$)->bits3.gen6_dp.binding_table_index = $3; + GEN(&$$)->bits3.gen6_dp.msg_control = $5; + GEN(&$$)->bits3.gen6_dp.msg_type = $7; + GEN(&$$)->bits3.gen6_dp.send_commit_msg = $9; + } else if (IS_GENx(5)) { + GEN(&$$)->bits2.send_gen5.sfid = + BRW_SFID_DATAPORT_WRITE; + GEN(&$$)->bits3.generic_gen5.header_present = 1; + GEN(&$$)->bits3.dp_write_gen5.binding_table_index = $3; + GEN(&$$)->bits3.dp_write_gen5.last_render_target = ($5 & 0x8) >> 3; + GEN(&$$)->bits3.dp_write_gen5.msg_control = $5 & 0x7; + GEN(&$$)->bits3.dp_write_gen5.msg_type = $7; + GEN(&$$)->bits3.dp_write_gen5.send_commit_msg = $9; + } else { + GEN(&$$)->bits3.generic.msg_target = + BRW_SFID_DATAPORT_WRITE; + GEN(&$$)->bits3.dp_write.binding_table_index = $3; + /* The msg control field of brw_struct.h is split into + * msg control and last_render_target, even though + * last_render_target isn't common to all write messages. + */ + GEN(&$$)->bits3.dp_write.last_render_target = ($5 & 0x8) >> 3; + GEN(&$$)->bits3.dp_write.msg_control = $5 & 0x7; + GEN(&$$)->bits3.dp_write.msg_type = $7; + GEN(&$$)->bits3.dp_write.send_commit_msg = $9; + } + } + | WRITE LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA + INTEGER COMMA INTEGER RPAREN + { + if (IS_GENx(7)) { + GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + GEN(&$$)->bits3.generic_gen5.header_present = ($11 != 0); + GEN(&$$)->bits3.gen7_dp.binding_table_index = $3; + GEN(&$$)->bits3.gen7_dp.msg_control = $5; + GEN(&$$)->bits3.gen7_dp.msg_type = $7; + } else if (IS_GENx(6)) { + GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; + GEN(&$$)->bits3.generic_gen5.header_present = ($11 != 0); + GEN(&$$)->bits3.gen6_dp.binding_table_index = $3; + GEN(&$$)->bits3.gen6_dp.msg_control = $5; + GEN(&$$)->bits3.gen6_dp.msg_type = $7; + GEN(&$$)->bits3.gen6_dp.send_commit_msg = $9; + } else if (IS_GENx(5)) { + GEN(&$$)->bits2.send_gen5.sfid = + BRW_SFID_DATAPORT_WRITE; + GEN(&$$)->bits3.generic_gen5.header_present = ($11 != 0); + GEN(&$$)->bits3.dp_write_gen5.binding_table_index = $3; + GEN(&$$)->bits3.dp_write_gen5.last_render_target = ($5 & 0x8) >> 3; + GEN(&$$)->bits3.dp_write_gen5.msg_control = $5 & 0x7; + GEN(&$$)->bits3.dp_write_gen5.msg_type = $7; + GEN(&$$)->bits3.dp_write_gen5.send_commit_msg = $9; + } else { + GEN(&$$)->bits3.generic.msg_target = + BRW_SFID_DATAPORT_WRITE; + GEN(&$$)->bits3.dp_write.binding_table_index = $3; + /* The msg control field of brw_struct.h is split into + * msg control and last_render_target, even though + * last_render_target isn't common to all write messages. + */ + GEN(&$$)->bits3.dp_write.last_render_target = ($5 & 0x8) >> 3; + GEN(&$$)->bits3.dp_write.msg_control = $5 & 0x7; + GEN(&$$)->bits3.dp_write.msg_type = $7; + GEN(&$$)->bits3.dp_write.send_commit_msg = $9; + } + } + | URB INTEGER urb_swizzle urb_allocate urb_used urb_complete + { + GEN(&$$)->bits3.generic.msg_target = BRW_SFID_URB; + if (IS_GENp(5)) { + GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_URB; + GEN(&$$)->bits3.generic_gen5.header_present = 1; + set_instruction_opcode(&$$, BRW_URB_OPCODE_WRITE); + GEN(&$$)->bits3.urb_gen5.offset = $2; + GEN(&$$)->bits3.urb_gen5.swizzle_control = $3; + GEN(&$$)->bits3.urb_gen5.pad = 0; + GEN(&$$)->bits3.urb_gen5.allocate = $4; + GEN(&$$)->bits3.urb_gen5.used = $5; + GEN(&$$)->bits3.urb_gen5.complete = $6; + } else { + GEN(&$$)->bits3.generic.msg_target = BRW_SFID_URB; + set_instruction_opcode(&$$, BRW_URB_OPCODE_WRITE); + GEN(&$$)->bits3.urb.offset = $2; + GEN(&$$)->bits3.urb.swizzle_control = $3; + GEN(&$$)->bits3.urb.pad = 0; + GEN(&$$)->bits3.urb.allocate = $4; + GEN(&$$)->bits3.urb.used = $5; + GEN(&$$)->bits3.urb.complete = $6; + } + } + | THREAD_SPAWNER LPAREN INTEGER COMMA INTEGER COMMA + INTEGER RPAREN + { + GEN(&$$)->bits3.generic.msg_target = + BRW_SFID_THREAD_SPAWNER; + if (IS_GENp(5)) { + GEN(&$$)->bits2.send_gen5.sfid = + BRW_SFID_THREAD_SPAWNER; + GEN(&$$)->bits3.generic_gen5.header_present = 0; + GEN(&$$)->bits3.thread_spawner_gen5.opcode = $3; + GEN(&$$)->bits3.thread_spawner_gen5.requester_type = $5; + GEN(&$$)->bits3.thread_spawner_gen5.resource_select = $7; + } else { + GEN(&$$)->bits3.generic.msg_target = + BRW_SFID_THREAD_SPAWNER; + GEN(&$$)->bits3.thread_spawner.opcode = $3; + GEN(&$$)->bits3.thread_spawner.requester_type = $5; + GEN(&$$)->bits3.thread_spawner.resource_select = $7; + } + } + | VME LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA INTEGER RPAREN + { + GEN(&$$)->bits3.generic.msg_target = GEN6_SFID_VME; + + if (IS_GENp(6)) { + GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_VME; + GEN(&$$)->bits3.vme_gen6.binding_table_index = $3; + GEN(&$$)->bits3.vme_gen6.search_path_index = $5; + GEN(&$$)->bits3.vme_gen6.lut_subindex = $7; + GEN(&$$)->bits3.vme_gen6.message_type = $9; + GEN(&$$)->bits3.generic_gen5.header_present = 1; + } else { + error (&@1, "Gen6- doesn't have vme function\n"); + } + } + | CRE LPAREN INTEGER COMMA INTEGER RPAREN + { + if (gen_level < 75) + error (&@1, "Below Gen7.5 doesn't have CRE function\n"); + + GEN(&$$)->bits3.generic.msg_target = HSW_SFID_CRE; + + GEN(&$$)->bits2.send_gen5.sfid = HSW_SFID_CRE; + GEN(&$$)->bits3.cre_gen75.binding_table_index = $3; + GEN(&$$)->bits3.cre_gen75.message_type = $5; + GEN(&$$)->bits3.generic_gen5.header_present = 1; + } + + | DATA_PORT LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA + INTEGER COMMA INTEGER COMMA INTEGER RPAREN + { + GEN(&$$)->bits2.send_gen5.sfid = $3; + GEN(&$$)->bits3.generic_gen5.header_present = ($13 != 0); + + if (IS_GENp(7)) { + if ($3 != GEN6_SFID_DATAPORT_SAMPLER_CACHE && + $3 != GEN6_SFID_DATAPORT_RENDER_CACHE && + $3 != GEN6_SFID_DATAPORT_CONSTANT_CACHE && + $3 != GEN7_SFID_DATAPORT_DATA_CACHE) { + error (&@3, "error: wrong cache type\n"); + } + + GEN(&$$)->bits3.gen7_dp.category = $11; + GEN(&$$)->bits3.gen7_dp.binding_table_index = $9; + GEN(&$$)->bits3.gen7_dp.msg_control = $7; + GEN(&$$)->bits3.gen7_dp.msg_type = $5; + } else if (IS_GENx(6)) { + if ($3 != GEN6_SFID_DATAPORT_SAMPLER_CACHE && + $3 != GEN6_SFID_DATAPORT_RENDER_CACHE && + $3 != GEN6_SFID_DATAPORT_CONSTANT_CACHE) { + error (&@3, "error: wrong cache type\n"); + } + + GEN(&$$)->bits3.gen6_dp.send_commit_msg = $11; + GEN(&$$)->bits3.gen6_dp.binding_table_index = $9; + GEN(&$$)->bits3.gen6_dp.msg_control = $7; + GEN(&$$)->bits3.gen6_dp.msg_type = $5; + } else if (!IS_GENp(5)) { + error (&@1, "Gen6- doesn't support data port for sampler/render/constant/data cache\n"); + } + } +; + +urb_allocate: ALLOCATE { $$ = 1; } + | /* empty */ { $$ = 0; } +; + +urb_used: USED { $$ = 1; } + | /* empty */ { $$ = 0; } +; + +urb_complete: COMPLETE { $$ = 1; } + | /* empty */ { $$ = 0; } +; + +urb_swizzle: TRANSPOSE { $$ = BRW_URB_SWIZZLE_TRANSPOSE; } + | INTERLEAVE { $$ = BRW_URB_SWIZZLE_INTERLEAVE; } + | /* empty */ { $$ = BRW_URB_SWIZZLE_NONE; } +; + +sampler_datatype: + TYPE_F + | TYPE_UD + | TYPE_D +; + +math_function: INV | LOG | EXP | SQRT | POW | SIN | COS | SINCOS | INTDIV + | INTMOD | INTDIVMOD +; + +math_signed: /* empty */ { $$ = 0; } + | SIGNED { $$ = 1; } +; + +math_scalar: /* empty */ { $$ = 0; } + | SCALAR { $$ = 1; } +; + +/* 1.4.2: Destination register */ + +dst: dstoperand | dstoperandex +; + +dstoperand: symbol_reg dstregion + { + $$ = $1.reg; + $$.hstride = resolve_dst_region(&$1, $2); + } + | dstreg dstregion writemask regtype + { + /* Returns an instruction with just the destination register + * filled in. + */ + $$ = $1; + $$.hstride = resolve_dst_region(NULL, $2); + $$.dw1.bits.writemask = $3.dw1.bits.writemask; + $$.type = $4.type; + } +; + +/* The dstoperandex returns an instruction with just the destination register + * filled in. + */ +dstoperandex: dstoperandex_typed dstregion regtype + { + $$ = $1; + $$.hstride = resolve_dst_region(NULL, $2); + $$.type = $3.type; + } + | maskstackreg + { + $$ = $1; + $$.hstride = 1; + $$.type = BRW_REGISTER_TYPE_UW; + } + | controlreg + { + $$ = $1; + $$.hstride = 1; + $$.type = BRW_REGISTER_TYPE_UD; + } + | ipreg + { + $$ = $1; + $$.hstride = 1; + $$.type = BRW_REGISTER_TYPE_UD; + } + | nullreg dstregion regtype + { + $$ = $1; + $$.hstride = resolve_dst_region(NULL, $2); + $$.type = $3.type; + } +; + +dstoperandex_typed: accreg | flagreg | addrreg | maskreg +; + +symbol_reg: STRING %prec STR_SYMBOL_REG + { + struct declared_register *dcl_reg = find_register($1); + + if (dcl_reg == NULL) + error(&@1, "can't find register %s\n", $1); + + memcpy(&$$, dcl_reg, sizeof(*dcl_reg)); + free($1); // $1 has been malloc'ed by strdup + } + | symbol_reg_p + { + $$=$1; + } +; + +symbol_reg_p: STRING LPAREN exp RPAREN + { + struct declared_register *dcl_reg = find_register($1); + + if (dcl_reg == NULL) + error(&@1, "can't find register %s\n", $1); + + memcpy(&$$, dcl_reg, sizeof(*dcl_reg)); + $$.reg.nr += $3; + free($1); + } + | STRING LPAREN exp COMMA exp RPAREN + { + struct declared_register *dcl_reg = find_register($1); + + if (dcl_reg == NULL) + error(&@1, "can't find register %s\n", $1); + + memcpy(&$$, dcl_reg, sizeof(*dcl_reg)); + $$.reg.nr += $3; + if(advanced_flag) { + int size = get_type_size(dcl_reg->reg.type); + $$.reg.nr += ($$.reg.subnr + $5) / (32 / size); + $$.reg.subnr = ($$.reg.subnr + $5) % (32 / size); + } else { + $$.reg.nr += ($$.reg.subnr + $5) / 32; + $$.reg.subnr = ($$.reg.subnr + $5) % 32; + } + free($1); + } +; +/* Returns a partially complete destination register consisting of the + * direct or indirect register addressing fields, but not stride or writemask. + */ +dstreg: directgenreg + { + $$ = $1; + $$.address_mode = BRW_ADDRESS_DIRECT; + } + | directmsgreg + { + $$ = $1; + $$.address_mode = BRW_ADDRESS_DIRECT; + } + | indirectgenreg + { + $$ = $1; + $$.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + } + | indirectmsgreg + { + $$ = $1; + $$.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + } +; + +/* 1.4.3: Source register */ +srcaccimm: srcacc | imm32reg +; + +srcacc: directsrcaccoperand | indirectsrcoperand +; + +srcimm: directsrcoperand | indirectsrcoperand| imm32reg +; + +imm32reg: imm32 srcimmtype + { + union { + int i; + float f; + } intfloat; + uint32_t d; + + switch ($2) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_V: + case BRW_REGISTER_TYPE_VF: + switch ($1.r) { + case imm32_d: + d = $1.u.d; + break; + default: + error (&@2, "non-int D/UD/V/VF representation: %d,type=%d\n", $1.r, $2); + } + break; + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + switch ($1.r) { + case imm32_d: + d = $1.u.d; + break; + default: + error (&@2, "non-int W/UW representation\n"); + } + d &= 0xffff; + d |= d << 16; + break; + case BRW_REGISTER_TYPE_F: + switch ($1.r) { + case imm32_f: + intfloat.f = $1.u.f; + break; + case imm32_d: + intfloat.f = (float) $1.u.d; + break; + default: + error (&@2, "non-float F representation\n"); + } + d = intfloat.i; + break; +#if 0 + case BRW_REGISTER_TYPE_VF: + fprintf (stderr, "Immediate type VF not supported yet\n"); + YYERROR; +#endif + default: + error(&@2, "unknown immediate type %d\n", $2); + } + memset (&$$, '\0', sizeof ($$)); + $$.reg.file = BRW_IMMEDIATE_VALUE; + $$.reg.type = $2; + $$.reg.dw1.ud = d; + } +; + +directsrcaccoperand: directsrcoperand + | accreg region regtype + { + set_direct_src_operand(&$$, &$1, $3.type); + $$.reg.vstride = $2.vert_stride; + $$.reg.width = $2.width; + $$.reg.hstride = $2.horiz_stride; + $$.default_region = $2.is_default; + } +; + +/* Returns a source operand in the src0 fields of an instruction. */ +srcarchoperandex: srcarchoperandex_typed region regtype + { + memset (&$$, '\0', sizeof ($$)); + $$.reg.file = $1.file; + $$.reg.type = $3.type; + $$.reg.subnr = $1.subnr; + $$.reg.nr = $1.nr; + $$.reg.vstride = $2.vert_stride; + $$.reg.width = $2.width; + $$.reg.hstride = $2.horiz_stride; + $$.default_region = $2.is_default; + $$.reg.negate = 0; + $$.reg.abs = 0; + } + | maskstackreg + { + set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UB); + } + | controlreg + { + set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD); + } +/* | statereg + { + set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD); + }*/ + | notifyreg + { + set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD); + } + | ipreg + { + set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD); + } + | nullreg region regtype + { + if ($3.is_default) { + set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD); + } else { + set_direct_src_operand(&$$, &$1, $3.type); + } + $$.default_region = 1; + } +; + +srcarchoperandex_typed: flagreg | addrreg | maskreg +; + +sendleadreg: symbol_reg + { + memset (&$$, '\0', sizeof ($$)); + $$.file = $1.reg.file; + $$.nr = $1.reg.nr; + $$.subnr = $1.reg.subnr; + } + | directgenreg | directmsgreg +; + +src: directsrcoperand | indirectsrcoperand +; + +directsrcoperand: negate abs symbol_reg region regtype + { + memset (&$$, '\0', sizeof ($$)); + $$.reg.address_mode = BRW_ADDRESS_DIRECT; + $$.reg.file = $3.reg.file; + $$.reg.nr = $3.reg.nr; + $$.reg.subnr = $3.reg.subnr; + if ($5.is_default) { + $$.reg.type = $3.reg.type; + } else { + $$.reg.type = $5.type; + } + if ($4.is_default) { + $$.reg.vstride = $3.src_region.vert_stride; + $$.reg.width = $3.src_region.width; + $$.reg.hstride = $3.src_region.horiz_stride; + } else { + $$.reg.vstride = $4.vert_stride; + $$.reg.width = $4.width; + $$.reg.hstride = $4.horiz_stride; + } + $$.reg.negate = $1; + $$.reg.abs = $2; + } + | statereg region regtype + { + if($2.is_default ==1 && $3.is_default == 1) + { + set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD); + } + else{ + memset (&$$, '\0', sizeof ($$)); + $$.reg.address_mode = BRW_ADDRESS_DIRECT; + $$.reg.file = $1.file; + $$.reg.nr = $1.nr; + $$.reg.subnr = $1.subnr; + $$.reg.vstride = $2.vert_stride; + $$.reg.width = $2.width; + $$.reg.hstride = $2.horiz_stride; + $$.reg.type = $3.type; + } + } + | negate abs directgenreg region regtype swizzle + { + memset (&$$, '\0', sizeof ($$)); + $$.reg.address_mode = BRW_ADDRESS_DIRECT; + $$.reg.file = $3.file; + $$.reg.nr = $3.nr; + $$.reg.subnr = $3.subnr; + $$.reg.type = $5.type; + $$.reg.vstride = $4.vert_stride; + $$.reg.width = $4.width; + $$.reg.hstride = $4.horiz_stride; + $$.default_region = $4.is_default; + $$.reg.negate = $1; + $$.reg.abs = $2; + $$.reg.dw1.bits.swizzle = $6.reg.dw1.bits.swizzle; + } + | srcarchoperandex +; + +indirectsrcoperand: + negate abs indirectgenreg indirectregion regtype swizzle + { + memset (&$$, '\0', sizeof ($$)); + $$.reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + $$.reg.file = $3.file; + $$.reg.subnr = $3.subnr; + $$.reg.dw1.bits.indirect_offset = $3.dw1.bits.indirect_offset; + $$.reg.type = $5.type; + $$.reg.vstride = $4.vert_stride; + $$.reg.width = $4.width; + $$.reg.hstride = $4.horiz_stride; + $$.reg.negate = $1; + $$.reg.abs = $2; + $$.reg.dw1.bits.swizzle = $6.reg.dw1.bits.swizzle; + } +; + +/* 1.4.4: Address Registers */ +/* Returns a partially-completed struct brw_reg consisting of the address + * register fields for register-indirect access. + */ +addrparam: addrreg COMMA immaddroffset + { + if ($3 < -512 || $3 > 511) + error(&@3, "Address immediate offset %d out of range\n", $3); + memset (&$$, '\0', sizeof ($$)); + $$.subnr = $1.subnr; + $$.dw1.bits.indirect_offset = $3; + } + | addrreg + { + memset (&$$, '\0', sizeof ($$)); + $$.subnr = $1.subnr; + $$.dw1.bits.indirect_offset = 0; + } +; + +/* The immaddroffset provides an immediate offset value added to the addresses + * from the address register in register-indirect register access. + */ +immaddroffset: /* empty */ { $$ = 0; } + | exp +; + + +/* 1.4.5: Register files and register numbers */ +subregnum: DOT exp + { + $$ = $2; + } + | %prec SUBREGNUM + { + /* Default to subreg 0 if unspecified. */ + $$ = 0; + } +; + +directgenreg: GENREG subregnum + { + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_GENERAL_REGISTER_FILE; + $$.nr = $1; + $$.subnr = $2; + } +; + +indirectgenreg: GENREGFILE LSQUARE addrparam RSQUARE + { + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_GENERAL_REGISTER_FILE; + $$.subnr = $3.subnr; + $$.dw1.bits.indirect_offset = $3.dw1.bits.indirect_offset; + } +; + +directmsgreg: MSGREG subregnum + { + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_MESSAGE_REGISTER_FILE; + $$.nr = $1; + $$.subnr = $2; + } +; + +indirectmsgreg: MSGREGFILE LSQUARE addrparam RSQUARE + { + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_MESSAGE_REGISTER_FILE; + $$.subnr = $3.subnr; + $$.dw1.bits.indirect_offset = $3.dw1.bits.indirect_offset; + } +; + +addrreg: ADDRESSREG subregnum + { + if ($1 != 0) + error(&@2, "address register number %d out of range", $1); + + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_ADDRESS | $1; + $$.subnr = $2; + } +; + +accreg: ACCREG subregnum + { + if ($1 > 1) + error(&@1, "accumulator register number %d out of range", $1); + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_ACCUMULATOR | $1; + $$.subnr = $2; + } +; + +flagreg: FLAGREG subregnum + { + if ((!IS_GENp(7) && $1 > 0) || + (IS_GENp(7) && $1 > 1)) { + error(&@2, "flag register number %d out of range\n", $1); + } + + if ($2 > 1) + error(&@2, "flag subregister number %d out of range\n", $1); + + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_FLAG | $1; + $$.subnr = $2; + } +; + +maskreg: MASKREG subregnum + { + if ($1 > 0) + error(&@1, "mask register number %d out of range", $1); + + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_MASK; + $$.subnr = $2; + } + | mask_subreg + { + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_MASK; + $$.subnr = $1; + } +; + +mask_subreg: AMASK | IMASK | LMASK | CMASK +; + +maskstackreg: MASKSTACKREG subregnum + { + if ($1 > 0) + error(&@1, "mask stack register number %d out of range", $1); + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_MASK_STACK; + $$.subnr = $2; + } + | maskstack_subreg + { + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_MASK_STACK; + $$.subnr = $1; + } +; + +maskstack_subreg: IMS | LMS +; + +/* +maskstackdepthreg: MASKSTACKDEPTHREG subregnum + { + if ($1 > 0) + error(&@1, "mask stack register number %d out of range", $1); + memset (&$$, '\0', sizeof ($$)); + $$.reg_file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.reg_nr = BRW_ARF_MASK_STACK_DEPTH; + $$.subreg_nr = $2; + } + | maskstackdepth_subreg + { + memset (&$$, '\0', sizeof ($$)); + $$.reg_file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.reg_nr = BRW_ARF_MASK_STACK_DEPTH; + $$.subreg_nr = $1; + } +; + +maskstackdepth_subreg: IMSD | LMSD +; + */ + +notifyreg: NOTIFYREG regtype + { + int num_notifyreg = (IS_GENp(6)) ? 3 : 2; + + if ($1 > num_notifyreg) + error(&@1, "notification register number %d out of range", + $1); + + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + + if (IS_GENp(6)) { + $$.nr = BRW_ARF_NOTIFICATION_COUNT; + $$.subnr = $1; + } else { + $$.nr = BRW_ARF_NOTIFICATION_COUNT | $1; + $$.subnr = 0; + } + } +/* + | NOTIFYREG regtype + { + if ($1 > 1) { + fprintf(stderr, + "notification register number %d out of range", + $1); + YYERROR; + } + memset (&$$, '\0', sizeof ($$)); + $$.reg_file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.reg_nr = BRW_ARF_NOTIFICATION_COUNT; + $$.subreg_nr = 0; + } +*/ +; + +statereg: STATEREG subregnum + { + if ($1 > 0) + error(&@1, "state register number %d out of range", $1); + + if ($2 > 1) + error(&@2, "state subregister number %d out of range", $1); + + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_STATE | $1; + $$.subnr = $2; + } +; + +controlreg: CONTROLREG subregnum + { + if ($1 > 0) + error(&@1, "control register number %d out of range", $1); + + if ($2 > 2) + error(&@2, "control subregister number %d out of range", $1); + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_CONTROL | $1; + $$.subnr = $2; + } +; + +ipreg: IPREG regtype + { + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_IP; + $$.subnr = 0; + } +; + +nullreg: NULL_TOKEN + { + memset (&$$, '\0', sizeof ($$)); + $$.file = BRW_ARCHITECTURE_REGISTER_FILE; + $$.nr = BRW_ARF_NULL; + $$.subnr = 0; + } +; + +/* 1.4.6: Relative locations */ +relativelocation: + simple_int + { + if (($1 > 32767) || ($1 < -32768)) + error(&@1, "error: relative offset %d out of range \n", $1); + + memset (&$$, '\0', sizeof ($$)); + $$.reg.file = BRW_IMMEDIATE_VALUE; + $$.reg.type = BRW_REGISTER_TYPE_D; + $$.imm32 = $1 & 0x0000ffff; + } + | STRING + { + memset (&$$, '\0', sizeof ($$)); + $$.reg.file = BRW_IMMEDIATE_VALUE; + $$.reg.type = BRW_REGISTER_TYPE_D; + $$.reloc_target = $1; + } +; + +relativelocation2: + STRING + { + memset (&$$, '\0', sizeof ($$)); + $$.reg.file = BRW_IMMEDIATE_VALUE; + $$.reg.type = BRW_REGISTER_TYPE_D; + $$.reloc_target = $1; + } + | exp + { + memset (&$$, '\0', sizeof ($$)); + $$.reg.file = BRW_IMMEDIATE_VALUE; + $$.reg.type = BRW_REGISTER_TYPE_D; + $$.imm32 = $1; + } + | directgenreg region regtype + { + set_direct_src_operand(&$$, &$1, $3.type); + $$.reg.vstride = $2.vert_stride; + $$.reg.width = $2.width; + $$.reg.hstride = $2.horiz_stride; + $$.default_region = $2.is_default; + } + | symbol_reg_p + { + memset (&$$, '\0', sizeof ($$)); + $$.reg.address_mode = BRW_ADDRESS_DIRECT; + $$.reg.file = $1.reg.file; + $$.reg.nr = $1.reg.nr; + $$.reg.subnr = $1.reg.subnr; + $$.reg.type = $1.reg.type; + $$.reg.vstride = $1.src_region.vert_stride; + $$.reg.width = $1.src_region.width; + $$.reg.hstride = $1.src_region.horiz_stride; + } + | indirectgenreg indirectregion regtype + { + memset (&$$, '\0', sizeof ($$)); + $$.reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + $$.reg.file = $1.file; + $$.reg.subnr = $1.subnr; + $$.reg.dw1.bits.indirect_offset = $1.dw1.bits.indirect_offset; + $$.reg.type = $3.type; + $$.reg.vstride = $2.vert_stride; + $$.reg.width = $2.width; + $$.reg.hstride = $2.horiz_stride; + } +; + +/* 1.4.7: Regions */ +dstregion: /* empty */ + { + $$ = DEFAULT_DSTREGION; + } + |LANGLE exp RANGLE + { + /* Returns a value for a horiz_stride field of an + * instruction. + */ + if ($2 != 1 && $2 != 2 && $2 != 4) + error(&@2, "Invalid horiz size %d\n", $2); + + $$ = ffs($2); + } +; + +region: /* empty */ + { + /* XXX is this default value correct?*/ + memset (&$$, '\0', sizeof ($$)); + $$.vert_stride = ffs(0); + $$.width = BRW_WIDTH_1; + $$.horiz_stride = ffs(0); + $$.is_default = 1; + } + |LANGLE exp RANGLE + { + /* XXX is this default value correct for accreg?*/ + memset (&$$, '\0', sizeof ($$)); + $$.vert_stride = ffs($2); + $$.width = BRW_WIDTH_1; + $$.horiz_stride = ffs(0); + } + |LANGLE exp COMMA exp COMMA exp RANGLE + { + memset (&$$, '\0', sizeof ($$)); + $$.vert_stride = ffs($2); + $$.width = ffs($4) - 1; + $$.horiz_stride = ffs($6); + } + | LANGLE exp SEMICOLON exp COMMA exp RANGLE + { + memset (&$$, '\0', sizeof ($$)); + $$.vert_stride = ffs($2); + $$.width = ffs($4) - 1; + $$.horiz_stride = ffs($6); + } + +; +/* region_wh is used in specifying indirect operands where rather than having + * a vertical stride, you use subsequent address registers to get a new base + * offset for the next row. + */ +region_wh: LANGLE exp COMMA exp RANGLE + { + memset (&$$, '\0', sizeof ($$)); + $$.vert_stride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL; + $$.width = ffs($2) - 1; + $$.horiz_stride = ffs($4); + } +; + +indirectregion: region | region_wh +; + +/* 1.4.8: Types */ + +/* regtype returns an integer register type suitable for inserting into an + * instruction. + */ +regtype: /* empty */ + { $$.type = program_defaults.register_type;$$.is_default = 1;} + | TYPE_F { $$.type = BRW_REGISTER_TYPE_F;$$.is_default = 0; } + | TYPE_UD { $$.type = BRW_REGISTER_TYPE_UD;$$.is_default = 0; } + | TYPE_D { $$.type = BRW_REGISTER_TYPE_D;$$.is_default = 0; } + | TYPE_UW { $$.type = BRW_REGISTER_TYPE_UW;$$.is_default = 0; } + | TYPE_W { $$.type = BRW_REGISTER_TYPE_W;$$.is_default = 0; } + | TYPE_UB { $$.type = BRW_REGISTER_TYPE_UB;$$.is_default = 0; } + | TYPE_B { $$.type = BRW_REGISTER_TYPE_B;$$.is_default = 0; } +; + +srcimmtype: /* empty */ + { + /* XXX change to default when pragma parse is done */ + $$ = BRW_REGISTER_TYPE_D; + } + |TYPE_F { $$ = BRW_REGISTER_TYPE_F; } + | TYPE_UD { $$ = BRW_REGISTER_TYPE_UD; } + | TYPE_D { $$ = BRW_REGISTER_TYPE_D; } + | TYPE_UW { $$ = BRW_REGISTER_TYPE_UW; } + | TYPE_W { $$ = BRW_REGISTER_TYPE_W; } + | TYPE_V { $$ = BRW_REGISTER_TYPE_V; } + | TYPE_VF { $$ = BRW_REGISTER_TYPE_VF; } +; + +/* 1.4.10: Swizzle control */ +/* Returns the swizzle control for an align16 instruction's source operand + * in the src0 fields. + */ +swizzle: /* empty */ + { + $$.reg.dw1.bits.swizzle = BRW_SWIZZLE_NOOP; + } + | DOT chansel + { + $$.reg.dw1.bits.swizzle = BRW_SWIZZLE4($2, $2, $2, $2); + } + | DOT chansel chansel chansel chansel + { + $$.reg.dw1.bits.swizzle = BRW_SWIZZLE4($2, $3, $4, $5); + } +; + +chansel: X | Y | Z | W +; + +/* 1.4.9: Write mask */ +/* Returns a partially completed struct brw_reg, with just the writemask bits + * filled out. + */ +writemask: /* empty */ + { + $$.dw1.bits.writemask = BRW_WRITEMASK_XYZW; + } + | DOT writemask_x writemask_y writemask_z writemask_w + { + $$.dw1.bits.writemask = $2 | $3 | $4 | $5; + } +; + +writemask_x: /* empty */ { $$ = 0; } + | X { $$ = 1 << BRW_CHANNEL_X; } +; + +writemask_y: /* empty */ { $$ = 0; } + | Y { $$ = 1 << BRW_CHANNEL_Y; } +; + +writemask_z: /* empty */ { $$ = 0; } + | Z { $$ = 1 << BRW_CHANNEL_Z; } +; + +writemask_w: /* empty */ { $$ = 0; } + | W { $$ = 1 << BRW_CHANNEL_W; } +; + +/* 1.4.11: Immediate values */ +imm32: exp { $$.r = imm32_d; $$.u.d = $1; } + | NUMBER { $$.r = imm32_f; $$.u.f = $1; } +; + +/* 1.4.12: Predication and modifiers */ +predicate: /* empty */ + { + $$.pred_control = BRW_PREDICATE_NONE; + $$.flag_reg_nr = 0; + $$.flag_subreg_nr = 0; + $$.pred_inverse = 0; + } + | LPAREN predstate flagreg predctrl RPAREN + { + $$.pred_control = $4; + $$.flag_reg_nr = $3.nr; + $$.flag_subreg_nr = $3.subnr; + $$.pred_inverse = $2; + } +; + +predstate: /* empty */ { $$ = 0; } + | PLUS { $$ = 0; } + | MINUS { $$ = 1; } +; + +predctrl: /* empty */ { $$ = BRW_PREDICATE_NORMAL; } + | DOT X { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_X; } + | DOT Y { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_Y; } + | DOT Z { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_Z; } + | DOT W { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_W; } + | ANYV { $$ = BRW_PREDICATE_ALIGN1_ANYV; } + | ALLV { $$ = BRW_PREDICATE_ALIGN1_ALLV; } + | ANY2H { $$ = BRW_PREDICATE_ALIGN1_ANY2H; } + | ALL2H { $$ = BRW_PREDICATE_ALIGN1_ALL2H; } + | ANY4H { $$ = BRW_PREDICATE_ALIGN1_ANY4H; } + | ALL4H { $$ = BRW_PREDICATE_ALIGN1_ALL4H; } + | ANY8H { $$ = BRW_PREDICATE_ALIGN1_ANY8H; } + | ALL8H { $$ = BRW_PREDICATE_ALIGN1_ALL8H; } + | ANY16H { $$ = BRW_PREDICATE_ALIGN1_ANY16H; } + | ALL16H { $$ = BRW_PREDICATE_ALIGN1_ALL16H; } +; + +negate: /* empty */ { $$ = 0; } + | MINUS { $$ = 1; } +; + +abs: /* empty */ { $$ = 0; } + | ABS { $$ = 1; } +; + +execsize: /* empty */ %prec EMPTEXECSIZE + { + $$ = ffs(program_defaults.execute_size) - 1; + } + |LPAREN exp RPAREN + { + /* Returns a value for the execution_size field of an + * instruction. + */ + if ($2 != 1 && $2 != 2 && $2 != 4 && $2 != 8 && $2 != 16 && + $2 != 32) + error(&@2, "Invalid execution size %d\n", $2); + + $$ = ffs($2) - 1; + } +; + +saturate: /* empty */ { $$ = BRW_INSTRUCTION_NORMAL; } + | SATURATE { $$ = BRW_INSTRUCTION_SATURATE; } +; +conditionalmodifier: condition + { + $$.cond = $1; + $$.flag_reg_nr = 0; + $$.flag_subreg_nr = -1; + } + | condition DOT flagreg + { + $$.cond = $1; + $$.flag_reg_nr = ($3.nr & 0xF); + $$.flag_subreg_nr = $3.subnr; + } + +condition: /* empty */ { $$ = BRW_CONDITIONAL_NONE; } + | ZERO + | EQUAL + | NOT_ZERO + | NOT_EQUAL + | GREATER + | GREATER_EQUAL + | LESS + | LESS_EQUAL + | ROUND_INCREMENT + | OVERFLOW + | UNORDERED +; + +/* 1.4.13: Instruction options */ +instoptions: /* empty */ + { memset(&$$, 0, sizeof($$)); } + | LCURLY instoption_list RCURLY + { $$ = $2; } +; + +instoption_list:instoption_list COMMA instoption + { + $$ = $1; + add_option(&$$, $3); + } + | instoption_list instoption + { + $$ = $1; + add_option(&$$, $2); + } + | /* empty, header defaults to zeroes. */ + { + memset(&$$, 0, sizeof($$)); + } +; + +instoption: ALIGN1 { $$ = ALIGN1; } + | ALIGN16 { $$ = ALIGN16; } + | SECHALF { $$ = SECHALF; } + | COMPR { $$ = COMPR; } + | SWITCH { $$ = SWITCH; } + | ATOMIC { $$ = ATOMIC; } + | NODDCHK { $$ = NODDCHK; } + | NODDCLR { $$ = NODDCLR; } + | MASK_DISABLE { $$ = MASK_DISABLE; } + | BREAKPOINT { $$ = BREAKPOINT; } + | ACCWRCTRL { $$ = ACCWRCTRL; } + | EOT { $$ = EOT; } +; + +%% +extern int yylineno; + +void yyerror (char *msg) +{ + fprintf(stderr, "%s: %d: %s at \"%s\"\n", + input_filename, yylineno, msg, lex_text()); + ++errors; +} + +static int get_type_size(unsigned type) +{ + int size = 1; + + switch (type) { + case BRW_REGISTER_TYPE_F: + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + size = 4; + break; + + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + size = 2; + break; + + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + size = 1; + break; + + default: + assert(0); + size = 1; + break; + } + + return size; +} + +static void reset_instruction_src_region(struct brw_instruction *instr, + struct src_operand *src) +{ + if (!src->default_region) + return; + + if (src->reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + ((src->reg.nr & 0xF0) == BRW_ARF_ADDRESS)) { + src->reg.vstride = ffs(0); + src->reg.width = BRW_WIDTH_1; + src->reg.hstride = ffs(0); + } else if (src->reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + ((src->reg.nr & 0xF0) == BRW_ARF_ACCUMULATOR)) { + int horiz_stride = 1, width, vert_stride; + if (instr->header.compression_control == BRW_COMPRESSION_COMPRESSED) { + width = 16; + } else { + width = 8; + } + + if (width > (1 << instr->header.execution_size)) + width = (1 << instr->header.execution_size); + + vert_stride = horiz_stride * width; + src->reg.vstride = ffs(vert_stride); + src->reg.width = ffs(width) - 1; + src->reg.hstride = ffs(horiz_stride); + } else if ((src->reg.file == BRW_ARCHITECTURE_REGISTER_FILE) && + (src->reg.nr == BRW_ARF_NULL) && + (instr->header.opcode == BRW_OPCODE_SEND)) { + src->reg.vstride = ffs(8); + src->reg.width = BRW_WIDTH_8; + src->reg.hstride = ffs(1); + } else { + + int horiz_stride = 1, width, vert_stride; + + if (instr->header.execution_size == 0) { /* scalar */ + horiz_stride = 0; + width = 1; + vert_stride = 0; + } else { + if ((instr->header.opcode == BRW_OPCODE_MUL) || + (instr->header.opcode == BRW_OPCODE_MAC) || + (instr->header.opcode == BRW_OPCODE_CMP) || + (instr->header.opcode == BRW_OPCODE_ASR) || + (instr->header.opcode == BRW_OPCODE_ADD) || + (instr->header.opcode == BRW_OPCODE_SHL)) { + horiz_stride = 0; + width = 1; + vert_stride = 0; + } else { + width = (1 << instr->header.execution_size) / horiz_stride; + vert_stride = horiz_stride * width; + + if (get_type_size(src->reg.type) * (width + src->reg.subnr) > 32) { + horiz_stride = 0; + width = 1; + vert_stride = 0; + } + } + } + + src->reg.vstride = ffs(vert_stride); + src->reg.width = ffs(width) - 1; + src->reg.hstride = ffs(horiz_stride); + } +} + +static void set_instruction_opcode(struct brw_program_instruction *instr, + unsigned opcode) +{ + GEN(instr)->header.opcode = opcode; +} + +/** + * Fills in the destination register information in instr from the bits in dst. + */ +static int set_instruction_dest(struct brw_program_instruction *instr, + struct brw_reg *dest) +{ + if (!validate_dst_reg(GEN(instr), dest)) + return 1; + + /* the assembler support expressing subnr in bytes or in number of + * elements. */ + resolve_subnr(dest); + + brw_set_dest(&genasm_compile, GEN(instr), *dest); + + return 0; +} + +/* Sets the first source operand for the instruction. Returns 0 on success. */ +static int set_instruction_src0(struct brw_program_instruction *instr, + struct src_operand *src, + YYLTYPE *location) +{ + + if (advanced_flag) + reset_instruction_src_region(GEN(instr), src); + + if (!validate_src_reg(GEN(instr), src->reg, location)) + return 1; + + /* the assembler support expressing subnr in bytes or in number of + * elements. */ + resolve_subnr(&src->reg); + + brw_set_src0(&genasm_compile, GEN(instr), src->reg); + + return 0; +} + +/* Sets the second source operand for the instruction. Returns 0 on success. + */ +static int set_instruction_src1(struct brw_program_instruction *instr, + struct src_operand *src, + YYLTYPE *location) +{ + if (advanced_flag) + reset_instruction_src_region(GEN(instr), src); + + if (!validate_src_reg(GEN(instr), src->reg, location)) + return 1; + + /* the assembler support expressing subnr in bytes or in number of + * elements. */ + resolve_subnr(&src->reg); + + brw_set_src1(&genasm_compile, GEN(instr), src->reg); + + return 0; +} + +static int set_instruction_dest_three_src(struct brw_program_instruction *instr, + struct brw_reg *dest) +{ + resolve_subnr(dest); + brw_set_3src_dest(&genasm_compile, GEN(instr), *dest); + return 0; +} + +static int set_instruction_src0_three_src(struct brw_program_instruction *instr, + struct src_operand *src) +{ + if (advanced_flag) + reset_instruction_src_region(GEN(instr), src); + + resolve_subnr(&src->reg); + + // TODO: src0 modifier, src0 rep_ctrl + brw_set_3src_src0(&genasm_compile, GEN(instr), src->reg); + return 0; +} + +static int set_instruction_src1_three_src(struct brw_program_instruction *instr, + struct src_operand *src) +{ + if (advanced_flag) + reset_instruction_src_region(GEN(instr), src); + + resolve_subnr(&src->reg); + + // TODO: src1 modifier, src1 rep_ctrl + brw_set_3src_src1(&genasm_compile, GEN(instr), src->reg); + return 0; +} + +static int set_instruction_src2_three_src(struct brw_program_instruction *instr, + struct src_operand *src) +{ + if (advanced_flag) + reset_instruction_src_region(GEN(instr), src); + + resolve_subnr(&src->reg); + + // TODO: src2 modifier, src2 rep_ctrl + brw_set_3src_src2(&genasm_compile, GEN(instr), src->reg); + return 0; +} + +static void set_instruction_saturate(struct brw_program_instruction *instr, + int saturate) +{ + GEN(instr)->header.saturate = saturate; +} + +static void set_instruction_options(struct brw_program_instruction *instr, + struct options options) +{ + GEN(instr)->header.access_mode = options.access_mode; + GEN(instr)->header.compression_control = options.compression_control; + GEN(instr)->header.thread_control = options.thread_control; + GEN(instr)->header.dependency_control = options.dependency_control; + GEN(instr)->header.mask_control = options.mask_control; + GEN(instr)->header.debug_control = options.debug_control; + GEN(instr)->header.acc_wr_control = options.acc_wr_control; + GEN(instr)->bits3.generic.end_of_thread = options.end_of_thread; +} + +static void set_instruction_predicate(struct brw_program_instruction *instr, + struct predicate *p) +{ + GEN(instr)->header.predicate_control = p->pred_control; + GEN(instr)->header.predicate_inverse = p->pred_inverse; + GEN(instr)->bits2.da1.flag_reg_nr = p->flag_reg_nr; + GEN(instr)->bits2.da1.flag_subreg_nr = p->flag_subreg_nr; +} + +static void set_instruction_pred_cond(struct brw_program_instruction *instr, + struct predicate *p, + struct condition *c, + YYLTYPE *location) +{ + set_instruction_predicate(instr, p); + GEN(instr)->header.destreg__conditionalmod = c->cond; + + if (c->flag_subreg_nr == -1) + return; + + if (p->pred_control != BRW_PREDICATE_NONE && + (p->flag_reg_nr != c->flag_reg_nr || + p->flag_subreg_nr != c->flag_subreg_nr)) + { + warn(ALWAYS, location, "must use the same flag register if both " + "prediction and conditional modifier are enabled\n"); + } + + GEN(instr)->bits2.da1.flag_reg_nr = c->flag_reg_nr; + GEN(instr)->bits2.da1.flag_subreg_nr = c->flag_subreg_nr; +} + +static void set_direct_dst_operand(struct brw_reg *dst, struct brw_reg *reg, + int type) +{ + *dst = *reg; + dst->address_mode = BRW_ADDRESS_DIRECT; + dst->type = type; + dst->hstride = 1; + dst->dw1.bits.writemask = BRW_WRITEMASK_XYZW; +} + +static void set_direct_src_operand(struct src_operand *src, struct brw_reg *reg, + int type) +{ + memset(src, 0, sizeof(*src)); + src->reg.address_mode = BRW_ADDRESS_DIRECT; + src->reg.file = reg->file; + src->reg.type = type; + src->reg.subnr = reg->subnr; + src->reg.nr = reg->nr; + src->reg.vstride = 0; + src->reg.width = 0; + src->reg.hstride = 0; + src->reg.negate = 0; + src->reg.abs = 0; + SWIZZLE(src->reg) = BRW_SWIZZLE_NOOP; +} diff --git a/assembler/intel-gen4asm.pc.in b/assembler/intel-gen4asm.pc.in new file mode 100644 index 0000000..54febc4 --- /dev/null +++ b/assembler/intel-gen4asm.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: intel-gen4asm +Description: An assembler compiler for the Intel 965+ Chipset +Version: @VERSION@ +Libs: +Cflags: diff --git a/assembler/lex.l b/assembler/lex.l new file mode 100644 index 0000000..769d98b --- /dev/null +++ b/assembler/lex.l @@ -0,0 +1,440 @@ +%option yylineno +%{ +#include <string.h> +#include "gen4asm.h" +#include "gram.h" +#include "brw_defines.h" + +#include "string.h" +int saved_state = 0; +extern char *input_filename; + +/* Locations */ +int yycolumn = 1; + +#define YY_USER_ACTION \ + yylloc.first_line = yylloc.last_line = yylineno; \ + yylloc.first_column = yycolumn; \ + yylloc.last_column = yycolumn+yyleng-1; \ + yycolumn += yyleng; + +%} +%x BLOCK_COMMENT +%x CHANNEL +%x LINENUMBER +%x FILENAME + +%% +\/\/.*[\r\n] { yycolumn = 1; } /* eat up single-line comments */ +"\.kernel".*[\r\n] { yycolumn = 1; } +"\.end_kernel".*[\r\n] { yycolumn = 1; } +"\.code".*[\r\n] { yycolumn = 1; } +"\.end_code".*[\r\n] { yycolumn = 1; } + + /* eat up multi-line comments, non-nesting. */ +\/\* { + saved_state = YYSTATE; + BEGIN(BLOCK_COMMENT); +} +<BLOCK_COMMENT>\*\/ { + BEGIN(saved_state); +} +<BLOCK_COMMENT>. { } +<BLOCK_COMMENT>[\r\n] { } +"#line"" "* { + yycolumn = 1; + saved_state = YYSTATE; + BEGIN(LINENUMBER); +} +<LINENUMBER>[0-9]+" "* { + yylineno = atoi (yytext) - 1; + BEGIN(FILENAME); +} +<FILENAME>\"[^\"]+\" { + char *name = malloc (yyleng - 1); + memmove (name, yytext + 1, yyleng - 2); + name[yyleng-1] = '\0'; + input_filename = name; + BEGIN(saved_state); +} + +<CHANNEL>"x" { + yylval.integer = BRW_CHANNEL_X; + return X; +} +<CHANNEL>"y" { + yylval.integer = BRW_CHANNEL_Y; + return Y; +} +<CHANNEL>"z" { + yylval.integer = BRW_CHANNEL_Z; + return Z; +} +<CHANNEL>"w" { +yylval.integer = BRW_CHANNEL_W; + return W; +} +<CHANNEL>. { + yyless(0); + BEGIN(INITIAL); +} + + /* used for both null send and null register. */ +"null" { return NULL_TOKEN; } + + /* opcodes */ +"mov" { yylval.integer = BRW_OPCODE_MOV; return MOV; } +"frc" { yylval.integer = BRW_OPCODE_FRC; return FRC; } +"rndu" { yylval.integer = BRW_OPCODE_RNDU; return RNDU; } +"rndd" { yylval.integer = BRW_OPCODE_RNDD; return RNDD; } +"rnde" { yylval.integer = BRW_OPCODE_RNDE; return RNDE; } +"rndz" { yylval.integer = BRW_OPCODE_RNDZ; return RNDZ; } +"not" { yylval.integer = BRW_OPCODE_NOT; return NOT; } +"lzd" { yylval.integer = BRW_OPCODE_LZD; return LZD; } +"f16to32" { yylval.integer = BRW_OPCODE_F16TO32; return F16TO32; } +"f32to16" { yylval.integer = BRW_OPCODE_F32TO16; return F32TO16; } +"fbh" { yylval.integer = BRW_OPCODE_FBH; return FBH; } +"fbl" { yylval.integer = BRW_OPCODE_FBL; return FBL; } + +"mad" { yylval.integer = BRW_OPCODE_MAD; return MAD; } +"lrp" { yylval.integer = BRW_OPCODE_LRP; return LRP; } +"bfe" { yylval.integer = BRW_OPCODE_BFE; return BFE; } +"bfi1" { yylval.integer = BRW_OPCODE_BFI1; return BFI1; } +"bfi2" { yylval.integer = BRW_OPCODE_BFI2; return BFI2; } +"bfrev" { yylval.integer = BRW_OPCODE_BFREV; return BFREV; } +"mul" { yylval.integer = BRW_OPCODE_MUL; return MUL; } +"mac" { yylval.integer = BRW_OPCODE_MAC; return MAC; } +"mach" { yylval.integer = BRW_OPCODE_MACH; return MACH; } +"line" { yylval.integer = BRW_OPCODE_LINE; return LINE; } +"sad2" { yylval.integer = BRW_OPCODE_SAD2; return SAD2; } +"sada2" { yylval.integer = BRW_OPCODE_SADA2; return SADA2; } +"dp4" { yylval.integer = BRW_OPCODE_DP4; return DP4; } +"dph" { yylval.integer = BRW_OPCODE_DPH; return DPH; } +"dp3" { yylval.integer = BRW_OPCODE_DP3; return DP3; } +"dp2" { yylval.integer = BRW_OPCODE_DP2; return DP2; } + +"cbit" { yylval.integer = BRW_OPCODE_CBIT; return CBIT; } +"avg" { yylval.integer = BRW_OPCODE_AVG; return AVG; } +"add" { yylval.integer = BRW_OPCODE_ADD; return ADD; } +"addc" { yylval.integer = BRW_OPCODE_ADDC; return ADDC; } +"sel" { yylval.integer = BRW_OPCODE_SEL; return SEL; } +"and" { yylval.integer = BRW_OPCODE_AND; return AND; } +"or" { yylval.integer = BRW_OPCODE_OR; return OR; } +"xor" { yylval.integer = BRW_OPCODE_XOR; return XOR; } +"shr" { yylval.integer = BRW_OPCODE_SHR; return SHR; } +"shl" { yylval.integer = BRW_OPCODE_SHL; return SHL; } +"asr" { yylval.integer = BRW_OPCODE_ASR; return ASR; } +"cmp" { yylval.integer = BRW_OPCODE_CMP; return CMP; } +"cmpn" { yylval.integer = BRW_OPCODE_CMPN; return CMPN; } +"subb" { yylval.integer = BRW_OPCODE_SUBB; return SUBB; } + +"send" { yylval.integer = BRW_OPCODE_SEND; return SEND; } +"nop" { yylval.integer = BRW_OPCODE_NOP; return NOP; } +"jmpi" { yylval.integer = BRW_OPCODE_JMPI; return JMPI; } +"if" { yylval.integer = BRW_OPCODE_IF; return IF; } +"iff" { yylval.integer = BRW_OPCODE_IFF; return IFF; } +"while" { yylval.integer = BRW_OPCODE_WHILE; return WHILE; } +"else" { yylval.integer = BRW_OPCODE_ELSE; return ELSE; } +"break" { yylval.integer = BRW_OPCODE_BREAK; return BREAK; } +"cont" { yylval.integer = BRW_OPCODE_CONTINUE; return CONT; } +"halt" { yylval.integer = BRW_OPCODE_HALT; return HALT; } +"msave" { yylval.integer = BRW_OPCODE_MSAVE; return MSAVE; } +"push" { yylval.integer = BRW_OPCODE_PUSH; return PUSH; } +"mrest" { yylval.integer = BRW_OPCODE_MRESTORE; return MREST; } +"pop" { yylval.integer = BRW_OPCODE_POP; return POP; } +"wait" { yylval.integer = BRW_OPCODE_WAIT; return WAIT; } +"do" { yylval.integer = BRW_OPCODE_DO; return DO; } +"endif" { yylval.integer = BRW_OPCODE_ENDIF; return ENDIF; } +"call" { yylval.integer = BRW_OPCODE_CALL; return CALL; } +"ret" { yylval.integer = BRW_OPCODE_RET; return RET; } +"brd" { yylval.integer = BRW_OPCODE_BRD; return BRD; } +"brc" { yylval.integer = BRW_OPCODE_BRC; return BRC; } + +"pln" { yylval.integer = BRW_OPCODE_PLN; return PLN; } + + /* send argument tokens */ +"mlen" { return MSGLEN; } +"rlen" { return RETURNLEN; } +"math" { if (IS_GENp(6)) { yylval.integer = BRW_OPCODE_MATH; return MATH_INST; } else return MATH; } +"sampler" { return SAMPLER; } +"gateway" { return GATEWAY; } +"read" { return READ; } +"write" { return WRITE; } +"urb" { return URB; } +"thread_spawner" { return THREAD_SPAWNER; } +"vme" { return VME; } +"cre" { return CRE; } +"data_port" { return DATA_PORT; } + +"allocate" { return ALLOCATE; } +"used" { return USED; } +"complete" { return COMPLETE; } +"transpose" { return TRANSPOSE; } +"interleave" { return INTERLEAVE; } + +";" { return SEMICOLON; } +"(" { return LPAREN; } +")" { return RPAREN; } +"<" { return LANGLE; } +">" { return RANGLE; } +"{" { return LCURLY; } +"}" { return RCURLY; } +"[" { return LSQUARE; } +"]" { return RSQUARE; } +"," { return COMMA; } +"." { BEGIN(CHANNEL); return DOT; } +"+" { return PLUS; } +"-" { return MINUS; } +"*" { return MULTIPLY;} +"/" { return DIVIDE; } +":" { return COLON; } +"=" { return EQ; } +"(abs)" { return ABS; } + + /* Most register accesses are lexed as REGFILE[0-9]+, to prevent the register + * with subreg from being lexed as REGFILE NUMBER instead of + * REGISTER INTEGER DOT INTEGER like we want. The alternative was to use a + * start condition, which wasn't very clean-looking. + * + * However, this means we need to lex the general and message register file + * characters as well, for register-indirect access which is formatted + * like g[a#.#] or m[a#.#]. + */ +"acc"[0-9]+ { + yylval.integer = atoi(yytext + 3); + return ACCREG; +} +"a"[0-9]+ { + yylval.integer = atoi(yytext + 1); + return ADDRESSREG; +} +"m"[0-9]+ { + yylval.integer = atoi(yytext + 1); + return MSGREG; +} +"m" { + return MSGREGFILE; +} +"mask"[0-9]+ { + yylval.integer = atoi(yytext + 4); + return MASKREG; +} +"ms"[0-9]+ { + yylval.integer = atoi(yytext + 2); + return MASKSTACKREG; +} +"msd"[0-9]+ { + yylval.integer = atoi(yytext + 3); + return MASKSTACKDEPTHREG; +} + +"n0."[0-9]+ { + yylval.integer = atoi(yytext + 3); + return NOTIFYREG; +} + +"n"[0-9]+ { + yylval.integer = atoi(yytext + 1); + return NOTIFYREG; +} + +"f"[0-9] { + yylval.integer = atoi(yytext + 1); + return FLAGREG; +} + +[gr][0-9]+ { + yylval.integer = atoi(yytext + 1); + return GENREG; +} +[gr] { + return GENREGFILE; +} +"cr"[0-9]+ { + yylval.integer = atoi(yytext + 2); + return CONTROLREG; +} +"sr"[0-9]+ { + yylval.integer = atoi(yytext + 2); + return STATEREG; +} +"ip" { + return IPREG; +} +"amask" { + yylval.integer = BRW_AMASK; + return AMASK; +} +"imask" { + yylval.integer = BRW_IMASK; + return IMASK; +} +"lmask" { + yylval.integer = BRW_LMASK; + return LMASK; +} +"cmask" { + yylval.integer = BRW_CMASK; + return CMASK; +} +"imsd" { + yylval.integer = 0; + return IMSD; +} +"lmsd" { + yylval.integer = 1; + return LMSD; +} +"ims" { + yylval.integer = 0; + return IMS; +} +"lms" { + yylval.integer = 16; + return LMS; +} + + /* + * Lexing of register types should probably require the ":" symbol specified + * in the BNF of the assembly, but our existing source didn't use that syntax. + */ +"UD" { return TYPE_UD; } +":UD" { return TYPE_UD; } +"D" { return TYPE_D; } +":D" { return TYPE_D; } +"UW" { return TYPE_UW; } +":UW" { return TYPE_UW; } +"W" { return TYPE_W; } +":W" { return TYPE_W; } +"UB" { return TYPE_UB; } +":UB" { return TYPE_UB; } +"B" { return TYPE_B; } +":B" { return TYPE_B; } +"F" { return TYPE_F; } +":F" { return TYPE_F; } +"VF" {return TYPE_VF; } +":VF" {return TYPE_VF; } +"V" { return TYPE_V; } +":V" { return TYPE_V; } + +#".kernel" { return KERNEL_PRAGMA;} +#".end_kernel" { return END_KERNEL_PRAGMA;} +#".code" { return CODE_PRAGMA;} +#".end_code" { return END_CODE_PRAGMA;} +".reg_count_payload" { return REG_COUNT_PAYLOAD_PRAGMA; } +".reg_count_total" { return REG_COUNT_TOTAL_PRAGMA; } +".default_execution_size" { return DEFAULT_EXEC_SIZE_PRAGMA; } +".default_register_type" { return DEFAULT_REG_TYPE_PRAGMA; } +".declare" { return DECLARE_PRAGMA; } +"Base" { return BASE; } +"ElementSize" { return ELEMENTSIZE; } +"SrcRegion" { return SRCREGION; } +"DstRegion" { return DSTREGION; } +"Type" { return TYPE; } + + +".sat" { return SATURATE; } +"align1" { return ALIGN1; } +"align16" { return ALIGN16; } +"sechalf" { return SECHALF; } +"compr" { return COMPR; } +"switch" { return SWITCH; } +"atomic" { return ATOMIC; } +"noddchk" { return NODDCHK; } +"noddclr" { return NODDCLR; } +"mask_disable" { return MASK_DISABLE; } +"nomask" { return MASK_DISABLE; } +"breakpoint" { return BREAKPOINT; } +"accwrctrl" { return ACCWRCTRL; } +"EOT" { return EOT; } + + /* extended math functions */ +"inv" { yylval.integer = BRW_MATH_FUNCTION_INV; return SIN; } +"log" { yylval.integer = BRW_MATH_FUNCTION_LOG; return LOG; } +"exp" { yylval.integer = BRW_MATH_FUNCTION_EXP; return EXP; } +"sqrt" { yylval.integer = BRW_MATH_FUNCTION_SQRT; return SQRT; } +"rsq" { yylval.integer = BRW_MATH_FUNCTION_RSQ; return RSQ; } +"pow" { yylval.integer = BRW_MATH_FUNCTION_POW; return POW; } +"sin" { yylval.integer = BRW_MATH_FUNCTION_SIN; return SIN; } +"cos" { yylval.integer = BRW_MATH_FUNCTION_COS; return COS; } +"sincos" { yylval.integer = BRW_MATH_FUNCTION_SINCOS; return SINCOS; } +"intdiv" { + yylval.integer = BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; + return INTDIV; +} +"intmod" { + yylval.integer = BRW_MATH_FUNCTION_INT_DIV_REMAINDER; + return INTMOD; +} +"intdivmod" { + yylval.integer = BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER; + return INTDIVMOD; +} + +"signed" { return SIGNED; } +"scalar" { return SCALAR; } + + /* predicate control */ +".anyv" { return ANYV; } +".allv" { return ALLV; } +".any2h" { return ANY2H; } +".all2h" { return ALL2H; } +".any4h" { return ANY4H; } +".all4h" { return ALL4H; } +".any8h" { return ANY8H; } +".all8h" { return ALL8H; } +".any16h" { return ANY16H; } +".all16h" { return ALL16H; } + +".z" { yylval.integer = BRW_CONDITIONAL_Z; return ZERO; } +".e" { yylval.integer = BRW_CONDITIONAL_Z; return EQUAL; } +".nz" { yylval.integer = BRW_CONDITIONAL_NZ; return NOT_ZERO; } +".ne" { yylval.integer = BRW_CONDITIONAL_NZ; return NOT_EQUAL; } +".g" { yylval.integer = BRW_CONDITIONAL_G; return GREATER; } +".ge" { yylval.integer = BRW_CONDITIONAL_GE; return GREATER_EQUAL; } +".l" { yylval.integer = BRW_CONDITIONAL_L; return LESS; } +".le" { yylval.integer = BRW_CONDITIONAL_LE; return LESS_EQUAL; } +".r" { yylval.integer = BRW_CONDITIONAL_R; return ROUND_INCREMENT; } +".o" { yylval.integer = BRW_CONDITIONAL_O; return OVERFLOW; } +".u" { yylval.integer = BRW_CONDITIONAL_U; return UNORDERED; } + +[a-zA-Z_][0-9a-zA-Z_]* { + yylval.string = strdup(yytext); + return STRING; +} + +0x[0-9a-fA-F][0-9a-fA-F]* { + yylval.integer = strtoul(yytext + 2, NULL, 16); + return INTEGER; +} +[0-9][0-9]* { + yylval.integer = strtoul(yytext, NULL, 10); + return INTEGER; +} + +<INITIAL>[-]?[0-9]+"."[0-9]+ { + yylval.number = strtod(yytext, NULL); + return NUMBER; +} + +[ \t]+ { } /* eat up whitespace */ + +\n { yycolumn = 1; } + +. { + fprintf(stderr, "%s: %d: %s at \"%s\"\n", + input_filename, yylineno, "unexpected token", lex_text()); + } +%% + +char * +lex_text(void) +{ + return yytext; + (void) yyunput; +} + +#ifndef yywrap +int yywrap() { return 1; } +#endif + diff --git a/assembler/main.c b/assembler/main.c new file mode 100644 index 0000000..05ca337 --- /dev/null +++ b/assembler/main.c @@ -0,0 +1,520 @@ +/* -*- c-basic-offset: 8 -*- */ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <assert.h> + +#include "ralloc.h" +#include "gen4asm.h" +#include "brw_eu.h" + +extern FILE *yyin; + +long int gen_level = 40; +int advanced_flag = 0; /* 0: in unit of byte, 1: in unit of data element size */ +unsigned int warning_flags = WARN_ALWAYS; +int need_export = 0; +char *input_filename = "<stdin>"; +int errors; + +struct brw_context genasm_brw_context; +struct brw_compile genasm_compile; + +struct brw_program compiled_program; +struct program_defaults program_defaults = {.register_type = BRW_REGISTER_TYPE_F}; + +/* 0: default output style, 1: nice C-style output */ +static int binary_like_output = 0; +static char *export_filename = NULL; +static const char binary_prepend[] = "static const char gen_eu_bytes[] = {\n"; + +#define HASH_SIZE 37 + +struct hash_item { + char *key; + void *value; + struct hash_item *next; +}; + +typedef struct hash_item *hash_table[HASH_SIZE]; + +static hash_table declared_register_table; + +struct label_item { + char *name; + int addr; + struct label_item *next; +}; +static struct label_item *label_table; + +static const struct option longopts[] = { + {"advanced", no_argument, 0, 'a'}, + {"binary", no_argument, 0, 'b'}, + {"export", required_argument, 0, 'e'}, + {"input_list", required_argument, 0, 'l'}, + {"output", required_argument, 0, 'o'}, + {"gen", required_argument, 0, 'g'}, + { NULL, 0, NULL, 0 } +}; + +// jump distance used in branch instructions as JIP or UIP +static int jump_distance(int offset) +{ + // Gen4- bspec: the jump distance is in number of sixteen-byte units + // Gen5+ bspec: the jump distance is in number of eight-byte units + if(IS_GENp(5)) + offset *= 2; + return offset; +} + +static void usage(void) +{ + fprintf(stderr, "usage: intel-gen4asm [options] inputfile\n"); + fprintf(stderr, "OPTIONS:\n"); + fprintf(stderr, "\t-a, --advanced Set advanced flag\n"); + fprintf(stderr, "\t-b, --binary C style binary output\n"); + fprintf(stderr, "\t-e, --export {exportfile} Export label file\n"); + fprintf(stderr, "\t-l, --input_list {entrytablefile} Input entry_table_list file\n"); + fprintf(stderr, "\t-o, --output {outputfile} Specify output file\n"); + fprintf(stderr, "\t-g, --gen <4|5|6|7> Specify GPU generation\n"); +} + +static int hash(char *key) +{ + unsigned ret = 0; + while(*key) + ret = (ret << 1) + (*key++); + return ret % HASH_SIZE; +} + +static void *find_hash_item(hash_table t, char *key) +{ + struct hash_item *p; + for(p = t[hash(key)]; p; p = p->next) + if(strcasecmp(p->key, key) == 0) + return p->value; + return NULL; +} + +static void insert_hash_item(hash_table t, char *key, void *v) +{ + int index = hash(key); + struct hash_item *p = malloc(sizeof(*p)); + p->key = key; + p->value = v; + p->next = t[index]; + t[index] = p; +} + +static void free_hash_table(hash_table t) +{ + struct hash_item *p, *next; + int i; + for (i = 0; i < HASH_SIZE; i++) { + p = t[i]; + while(p) { + next = p->next; + free(p->key); + free(p->value); + free(p); + p = next; + } + } +} + +struct declared_register *find_register(char *name) +{ + return find_hash_item(declared_register_table, name); +} + +void insert_register(struct declared_register *reg) +{ + insert_hash_item(declared_register_table, reg->name, reg); +} + +static void add_label(struct brw_program_instruction *i) +{ + struct label_item **p = &label_table; + + assert(is_label(i)); + + while(*p) + p = &((*p)->next); + *p = calloc(1, sizeof(**p)); + (*p)->name = label_name(i); + (*p)->addr = i->inst_offset; +} + +/* Some assembly code have duplicated labels. + Start from start_addr. Search as a loop. Return the first label found. */ +static int label_to_addr(char *name, int start_addr) +{ + /* return the first label just after start_addr, or the first label from the head */ + struct label_item *p; + int r = -1; + for(p = label_table; p; p = p->next) { + if(strcmp(p->name, name) == 0) { + if(p->addr >= start_addr) // the first label just after start_addr + return p->addr; + else if(r == -1) // the first label from the head + r = p->addr; + } + } + if(r == -1) { + fprintf(stderr, "Can't find label %s\n", name); + exit(1); + } + return r; +} + +static void free_label_table(struct label_item *p) +{ + if(p) { + free_label_table(p->next); + free(p); + } +} + +struct entry_point_item { + char *str; + struct entry_point_item *next; +} *entry_point_table; + +static int read_entry_file(char *fn) +{ + FILE *entry_table_file; + char buf[2048]; + struct entry_point_item **p = &entry_point_table; + if (!fn) + return 0; + if ((entry_table_file = fopen(fn, "r")) == NULL) + return -1; + while (fgets(buf, sizeof(buf)-1, entry_table_file) != NULL) { + // drop the final char '\n' + if(buf[strlen(buf)-1] == '\n') + buf[strlen(buf)-1] = 0; + *p = calloc(1, sizeof(struct entry_point_item)); + (*p)->str = strdup(buf); + p = &((*p)->next); + } + fclose(entry_table_file); + return 0; +} + +static int is_entry_point(struct brw_program_instruction *i) +{ + struct entry_point_item *p; + + assert(i->type == GEN4ASM_INSTRUCTION_LABEL); + + for (p = entry_point_table; p; p = p->next) { + if (strcmp(p->str, i->insn.label.name) == 0) + return 1; + } + return 0; +} + +static void free_entry_point_table(struct entry_point_item *p) { + if (p) { + free_entry_point_table(p->next); + free(p->str); + free(p); + } +} + +static void +print_instruction(FILE *output, struct brw_instruction *instruction) +{ + if (binary_like_output) { + fprintf(output, "\t0x%02x, 0x%02x, 0x%02x, 0x%02x, " + "0x%02x, 0x%02x, 0x%02x, 0x%02x,\n" + "\t0x%02x, 0x%02x, 0x%02x, 0x%02x, " + "0x%02x, 0x%02x, 0x%02x, 0x%02x,\n", + ((unsigned char *)instruction)[0], + ((unsigned char *)instruction)[1], + ((unsigned char *)instruction)[2], + ((unsigned char *)instruction)[3], + ((unsigned char *)instruction)[4], + ((unsigned char *)instruction)[5], + ((unsigned char *)instruction)[6], + ((unsigned char *)instruction)[7], + ((unsigned char *)instruction)[8], + ((unsigned char *)instruction)[9], + ((unsigned char *)instruction)[10], + ((unsigned char *)instruction)[11], + ((unsigned char *)instruction)[12], + ((unsigned char *)instruction)[13], + ((unsigned char *)instruction)[14], + ((unsigned char *)instruction)[15]); + } else { + fprintf(output, " { 0x%08x, 0x%08x, 0x%08x, 0x%08x },\n", + ((int *)instruction)[0], + ((int *)instruction)[1], + ((int *)instruction)[2], + ((int *)instruction)[3]); + } +} +int main(int argc, char **argv) +{ + char *output_file = NULL; + char *entry_table_file = NULL; + FILE *output = stdout; + FILE *export_file; + struct brw_program_instruction *entry, *entry1, *tmp_entry; + int err, inst_offset; + char o; + void *mem_ctx; + + while ((o = getopt_long(argc, argv, "e:l:o:g:abW", longopts, NULL)) != -1) { + switch (o) { + case 'o': + if (strcmp(optarg, "-") != 0) + output_file = optarg; + + break; + + case 'g': { + char *dec_ptr, *end_ptr; + unsigned long decimal; + + gen_level = strtol(optarg, &dec_ptr, 10) * 10; + + if (*dec_ptr == '.') { + decimal = strtoul(++dec_ptr, &end_ptr, 10); + if (end_ptr != dec_ptr && *end_ptr == '\0') { + if (decimal > 10) { + fprintf(stderr, "Invalid Gen X decimal version\n"); + exit(1); + } + gen_level += decimal; + } + } + + if (gen_level < 40 || gen_level > 75) { + usage(); + exit(1); + } + + break; + } + + case 'a': + advanced_flag = 1; + break; + case 'b': + binary_like_output = 1; + break; + + case 'e': + need_export = 1; + if (strcmp(optarg, "-") != 0) + export_filename = optarg; + break; + + case 'l': + if (strcmp(optarg, "-") != 0) + entry_table_file = optarg; + break; + + case 'W': + warning_flags |= WARN_ALL; + break; + + default: + usage(); + exit(1); + } + } + argc -= optind; + argv += optind; + if (argc != 1) { + usage(); + exit(1); + } + + if (strcmp(argv[0], "-") != 0) { + input_filename = argv[0]; + yyin = fopen(input_filename, "r"); + if (yyin == NULL) { + perror("Couldn't open input file"); + exit(1); + } + } + + brw_init_context(&genasm_brw_context, gen_level); + mem_ctx = ralloc_context(NULL); + brw_init_compile(&genasm_brw_context, &genasm_compile, mem_ctx); + + err = yyparse(); + + if (strcmp(argv[0], "-")) + fclose(yyin); + + yylex_destroy(); + + if (err || errors) + exit (1); + + if (output_file) { + output = fopen(output_file, "w"); + if (output == NULL) { + perror("Couldn't open output file"); + exit(1); + } + + } + + if (read_entry_file(entry_table_file)) { + fprintf(stderr, "Read entry file error\n"); + exit(1); + } + inst_offset = 0 ; + for (entry = compiled_program.first; + entry != NULL; entry = entry->next) { + entry->inst_offset = inst_offset; + entry1 = entry->next; + if (entry1 && is_label(entry1) && is_entry_point(entry1)) { + // insert NOP instructions until (inst_offset+1) % 4 == 0 + while (((inst_offset+1) % 4) != 0) { + tmp_entry = calloc(sizeof(*tmp_entry), 1); + tmp_entry->insn.gen.header.opcode = BRW_OPCODE_NOP; + entry->next = tmp_entry; + tmp_entry->next = entry1; + entry = tmp_entry; + tmp_entry->inst_offset = ++inst_offset; + } + } + if (!is_label(entry)) + inst_offset++; + } + + for (entry = compiled_program.first; entry; entry = entry->next) + if (is_label(entry)) + add_label(entry); + + if (need_export) { + if (export_filename) { + export_file = fopen(export_filename, "w"); + } else { + export_file = fopen("export.inc", "w"); + } + for (entry = compiled_program.first; + entry != NULL; entry = entry->next) { + if (is_label(entry)) + fprintf(export_file, "#define %s_IP %d\n", + label_name(entry), (IS_GENx(5) ? 2 : 1)*(entry->inst_offset)); + } + fclose(export_file); + } + + for (entry = compiled_program.first; entry; entry = entry->next) { + struct relocation *reloc = &entry->reloc; + struct brw_instruction *inst = &entry->insn.gen; + + if (!is_relocatable(entry)) + continue; + + if (reloc->first_reloc_target) + reloc->first_reloc_offset = label_to_addr(reloc->first_reloc_target, entry->inst_offset) - entry->inst_offset; + + if (reloc->second_reloc_target) + reloc->second_reloc_offset = label_to_addr(reloc->second_reloc_target, entry->inst_offset) - entry->inst_offset; + + if (reloc->second_reloc_offset) { + // this is a branch instruction with two offset arguments + inst->bits3.break_cont.jip = jump_distance(reloc->first_reloc_offset); + inst->bits3.break_cont.uip = jump_distance(reloc->second_reloc_offset); + } else if (reloc->first_reloc_offset) { + // this is a branch instruction with one offset argument + int offset = reloc->first_reloc_offset; + /* bspec: Unlike other flow control instructions, the offset used by JMPI is relative to the incremented instruction pointer rather than the IP value for the instruction itself. */ + + int is_jmpi = inst->header.opcode == BRW_OPCODE_JMPI; // target relative to the post-incremented IP, so delta == 1 if JMPI + if(is_jmpi) + offset --; + offset = jump_distance(offset); + if (is_jmpi && (gen_level == 75)) + offset = offset * 8; + + if(!IS_GENp(6)) { + inst->bits3.JIP = offset; + if(inst->header.opcode == BRW_OPCODE_ELSE) + inst->bits3.break_cont.uip = 1; /* Set the istack pop count, which must always be 1. */ + } else if(IS_GENx(6)) { + /* TODO: endif JIP pos is not in Gen6 spec. may be bits1 */ + int opcode = inst->header.opcode; + if(opcode == BRW_OPCODE_CALL || opcode == BRW_OPCODE_JMPI) + inst->bits3.JIP = offset; // for CALL, JMPI + else + inst->bits1.branch_gen6.jump_count = offset; // for CASE,ELSE,FORK,IF,WHILE + } else if(IS_GENp(7)) { + int opcode = inst->header.opcode; + /* Gen7 JMPI Restrictions in bspec: + * The JIP data type must be Signed DWord + */ + if(opcode == BRW_OPCODE_JMPI) + inst->bits3.JIP = offset; + else + inst->bits3.break_cont.jip = offset; + } + } + } + + if (binary_like_output) + fprintf(output, "%s", binary_prepend); + + for (entry = compiled_program.first; + entry != NULL; + entry = entry1) { + entry1 = entry->next; + if (!is_label(entry)) + print_instruction(output, &entry->insn.gen); + else + free(entry->insn.label.name); + free(entry); + } + if (binary_like_output) + fprintf(output, "};"); + + free_entry_point_table(entry_point_table); + free_hash_table(declared_register_table); + free_label_table(label_table); + + fflush (output); + if (ferror (output)) { + perror ("Could not flush output file"); + if (output_file) + unlink (output_file); + err = 1; + } + return err; +} diff --git a/assembler/ralloc.c b/assembler/ralloc.c new file mode 100644 index 0000000..59e71c4 --- /dev/null +++ b/assembler/ralloc.c @@ -0,0 +1,482 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> + +/* Android defines SIZE_MAX in limits.h, instead of the standard stdint.h */ +#ifdef ANDROID +#include <limits.h> +#endif + +/* Some versions of MinGW are missing _vscprintf's declaration, although they + * still provide the symbol in the import library. */ +#ifdef __MINGW32__ +_CRTIMP int _vscprintf(const char *format, va_list argptr); +#endif + +#include "ralloc.h" + +#ifndef va_copy +#ifdef __va_copy +#define va_copy(dest, src) __va_copy((dest), (src)) +#else +#define va_copy(dest, src) (dest) = (src) +#endif +#endif + +#define CANARY 0x5A1106 + +struct ralloc_header +{ + /* A canary value used to determine whether a pointer is ralloc'd. */ + unsigned canary; + + struct ralloc_header *parent; + + /* The first child (head of a linked list) */ + struct ralloc_header *child; + + /* Linked list of siblings */ + struct ralloc_header *prev; + struct ralloc_header *next; + + void (*destructor)(void *); +}; + +typedef struct ralloc_header ralloc_header; + +static void unlink_block(ralloc_header *info); +static void unsafe_free(ralloc_header *info); + +static ralloc_header * +get_header(const void *ptr) +{ + ralloc_header *info = (ralloc_header *) (((char *) ptr) - + sizeof(ralloc_header)); + assert(info->canary == CANARY); + return info; +} + +#define PTR_FROM_HEADER(info) (((char *) info) + sizeof(ralloc_header)) + +static void +add_child(ralloc_header *parent, ralloc_header *info) +{ + if (parent != NULL) { + info->parent = parent; + info->next = parent->child; + parent->child = info; + + if (info->next != NULL) + info->next->prev = info; + } +} + +void * +ralloc_context(const void *ctx) +{ + return ralloc_size(ctx, 0); +} + +void * +ralloc_size(const void *ctx, size_t size) +{ + void *block = calloc(1, size + sizeof(ralloc_header)); + + ralloc_header *info = (ralloc_header *) block; + ralloc_header *parent = ctx != NULL ? get_header(ctx) : NULL; + + add_child(parent, info); + + info->canary = CANARY; + + return PTR_FROM_HEADER(info); +} + +void * +rzalloc_size(const void *ctx, size_t size) +{ + void *ptr = ralloc_size(ctx, size); + if (likely(ptr != NULL)) + memset(ptr, 0, size); + return ptr; +} + +/* helper function - assumes ptr != NULL */ +static void * +resize(void *ptr, size_t size) +{ + ralloc_header *child, *old, *info; + + old = get_header(ptr); + info = realloc(old, size + sizeof(ralloc_header)); + + if (info == NULL) + return NULL; + + /* Update parent and sibling's links to the reallocated node. */ + if (info != old && info->parent != NULL) { + if (info->parent->child == old) + info->parent->child = info; + + if (info->prev != NULL) + info->prev->next = info; + + if (info->next != NULL) + info->next->prev = info; + } + + /* Update child->parent links for all children */ + for (child = info->child; child != NULL; child = child->next) + child->parent = info; + + return PTR_FROM_HEADER(info); +} + +void * +reralloc_size(const void *ctx, void *ptr, size_t size) +{ + if (unlikely(ptr == NULL)) + return ralloc_size(ctx, size); + + assert(ralloc_parent(ptr) == ctx); + return resize(ptr, size); +} + +void * +ralloc_array_size(const void *ctx, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return ralloc_size(ctx, size * count); +} + +void * +rzalloc_array_size(const void *ctx, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return rzalloc_size(ctx, size * count); +} + +void * +reralloc_array_size(const void *ctx, void *ptr, size_t size, unsigned count) +{ + if (count > SIZE_MAX/size) + return NULL; + + return reralloc_size(ctx, ptr, size * count); +} + +void +ralloc_free(void *ptr) +{ + ralloc_header *info; + + if (ptr == NULL) + return; + + info = get_header(ptr); + unlink_block(info); + unsafe_free(info); +} + +static void +unlink_block(ralloc_header *info) +{ + /* Unlink from parent & siblings */ + if (info->parent != NULL) { + if (info->parent->child == info) + info->parent->child = info->next; + + if (info->prev != NULL) + info->prev->next = info->next; + + if (info->next != NULL) + info->next->prev = info->prev; + } + info->parent = NULL; + info->prev = NULL; + info->next = NULL; +} + +static void +unsafe_free(ralloc_header *info) +{ + /* Recursively free any children...don't waste time unlinking them. */ + ralloc_header *temp; + while (info->child != NULL) { + temp = info->child; + info->child = temp->next; + unsafe_free(temp); + } + + /* Free the block itself. Call the destructor first, if any. */ + if (info->destructor != NULL) + info->destructor(PTR_FROM_HEADER(info)); + + free(info); +} + +void +ralloc_steal(const void *new_ctx, void *ptr) +{ + ralloc_header *info, *parent; + + if (unlikely(ptr == NULL)) + return; + + info = get_header(ptr); + parent = get_header(new_ctx); + + unlink_block(info); + + add_child(parent, info); +} + +void * +ralloc_parent(const void *ptr) +{ + ralloc_header *info; + + if (unlikely(ptr == NULL)) + return NULL; + + info = get_header(ptr); + return info->parent ? PTR_FROM_HEADER(info->parent) : NULL; +} + +static void *autofree_context = NULL; + +static void +autofree(void) +{ + ralloc_free(autofree_context); +} + +void * +ralloc_autofree_context(void) +{ + if (unlikely(autofree_context == NULL)) { + autofree_context = ralloc_context(NULL); + atexit(autofree); + } + return autofree_context; +} + +void +ralloc_set_destructor(const void *ptr, void(*destructor)(void *)) +{ + ralloc_header *info = get_header(ptr); + info->destructor = destructor; +} + +char * +ralloc_strdup(const void *ctx, const char *str) +{ + size_t n; + char *ptr; + + if (unlikely(str == NULL)) + return NULL; + + n = strlen(str); + ptr = ralloc_array(ctx, char, n + 1); + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +char * +ralloc_strndup(const void *ctx, const char *str, size_t max) +{ + size_t n; + char *ptr; + + if (unlikely(str == NULL)) + return NULL; + + n = strlen(str); + if (n > max) + n = max; + + ptr = ralloc_array(ctx, char, n + 1); + memcpy(ptr, str, n); + ptr[n] = '\0'; + return ptr; +} + +/* helper routine for strcat/strncat - n is the exact amount to copy */ +static bool +cat(char **dest, const char *str, size_t n) +{ + char *both; + size_t existing_length; + assert(dest != NULL && *dest != NULL); + + existing_length = strlen(*dest); + both = resize(*dest, existing_length + n + 1); + if (unlikely(both == NULL)) + return false; + + memcpy(both + existing_length, str, n); + both[existing_length + n] = '\0'; + + *dest = both; + return true; +} + + +bool +ralloc_strcat(char **dest, const char *str) +{ + return cat(dest, str, strlen(str)); +} + +bool +ralloc_strncat(char **dest, const char *str, size_t n) +{ + /* Clamp n to the string length */ + size_t str_length = strlen(str); + if (str_length < n) + n = str_length; + + return cat(dest, str, n); +} + +char * +ralloc_asprintf(const void *ctx, const char *fmt, ...) +{ + char *ptr; + va_list args; + va_start(args, fmt); + ptr = ralloc_vasprintf(ctx, fmt, args); + va_end(args); + return ptr; +} + +/* Return the length of the string that would be generated by a printf-style + * format and argument list, not including the \0 byte. + */ +static size_t +printf_length(const char *fmt, va_list untouched_args) +{ + int size; + char junk; + + /* Make a copy of the va_list so the original caller can still use it */ + va_list args; + va_copy(args, untouched_args); + +#ifdef _WIN32 + /* We need to use _vcsprintf to calculate the size as vsnprintf returns -1 + * if the number of characters to write is greater than count. + */ + size = _vscprintf(fmt, args); + (void)junk; +#else + size = vsnprintf(&junk, 1, fmt, args); +#endif + assert(size >= 0); + + va_end(args); + + return size; +} + +char * +ralloc_vasprintf(const void *ctx, const char *fmt, va_list args) +{ + size_t size = printf_length(fmt, args) + 1; + + char *ptr = ralloc_size(ctx, size); + if (ptr != NULL) + vsnprintf(ptr, size, fmt, args); + + return ptr; +} + +bool +ralloc_asprintf_append(char **str, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = ralloc_vasprintf_append(str, fmt, args); + va_end(args); + return success; +} + +bool +ralloc_vasprintf_append(char **str, const char *fmt, va_list args) +{ + size_t existing_length; + assert(str != NULL); + existing_length = *str ? strlen(*str) : 0; + return ralloc_vasprintf_rewrite_tail(str, &existing_length, fmt, args); +} + +bool +ralloc_asprintf_rewrite_tail(char **str, size_t *start, const char *fmt, ...) +{ + bool success; + va_list args; + va_start(args, fmt); + success = ralloc_vasprintf_rewrite_tail(str, start, fmt, args); + va_end(args); + return success; +} + +bool +ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, + va_list args) +{ + size_t new_length; + char *ptr; + + assert(str != NULL); + + if (unlikely(*str == NULL)) { + // Assuming a NULL context is probably bad, but it's expected behavior. + *str = ralloc_vasprintf(NULL, fmt, args); + return true; + } + + new_length = printf_length(fmt, args); + + ptr = resize(*str, *start + new_length + 1); + if (unlikely(ptr == NULL)) + return false; + + vsnprintf(ptr + *start, new_length + 1, fmt, args); + *str = ptr; + *start += new_length; + return true; +} diff --git a/assembler/ralloc.h b/assembler/ralloc.h new file mode 100644 index 0000000..6228d5b --- /dev/null +++ b/assembler/ralloc.h @@ -0,0 +1,407 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ralloc.h + * + * ralloc: a recursive memory allocator + * + * The ralloc memory allocator creates a hierarchy of allocated + * objects. Every allocation is in reference to some parent, and + * every allocated object can in turn be used as the parent of a + * subsequent allocation. This allows for extremely convenient + * discarding of an entire tree/sub-tree of allocations by calling + * ralloc_free on any particular object to free it and all of its + * children. + * + * The conceptual working of ralloc was directly inspired by Andrew + * Tridgell's talloc, but ralloc is an independent implementation + * released under the MIT license and tuned for Mesa. + * + * The talloc implementation is available under the GNU Lesser + * General Public License (GNU LGPL), version 3 or later. It is + * more sophisticated than ralloc in that it includes reference + * counting and debugging features. See: http://talloc.samba.org/ + */ + +#ifndef RALLOC_H +#define RALLOC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> +#include <stdarg.h> +#include <stdbool.h> +#include "brw_compat.h" + +/** + * \def ralloc(ctx, type) + * Allocate a new object chained off of the given context. + * + * This is equivalent to: + * \code + * ((type *) ralloc_size(ctx, sizeof(type)) + * \endcode + */ +#define ralloc(ctx, type) ((type *) ralloc_size(ctx, sizeof(type))) + +/** + * \def rzalloc(ctx, type) + * Allocate a new object out of the given context and initialize it to zero. + * + * This is equivalent to: + * \code + * ((type *) rzalloc_size(ctx, sizeof(type)) + * \endcode + */ +#define rzalloc(ctx, type) ((type *) rzalloc_size(ctx, sizeof(type))) + +/** + * Allocate a new ralloc context. + * + * While any ralloc'd pointer can be used as a context, sometimes it is useful + * to simply allocate a context with no associated memory. + * + * It is equivalent to: + * \code + * ((type *) ralloc_size(ctx, 0) + * \endcode + */ +void *ralloc_context(const void *ctx); + +/** + * Allocate memory chained off of the given context. + * + * This is the core allocation routine which is used by all others. It + * simply allocates storage for \p size bytes and returns the pointer, + * similar to \c malloc. + */ +void *ralloc_size(const void *ctx, size_t size); + +/** + * Allocate zero-initialized memory chained off of the given context. + * + * This is similar to \c calloc with a size of 1. + */ +void *rzalloc_size(const void *ctx, size_t size); + +/** + * Resize a piece of ralloc-managed memory, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the memory to be resized. May be NULL. + * \param size The amount of memory to allocate, in bytes. + */ +void *reralloc_size(const void *ctx, void *ptr, size_t size); + +/// \defgroup array Array Allocators @{ + +/** + * \def ralloc_array(ctx, type, count) + * Allocate an array of objects chained off the given context. + * + * Similar to \c calloc, but does not initialize the memory to zero. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * This is equivalent to: + * \code + * ((type *) ralloc_array_size(ctx, sizeof(type), count) + * \endcode + */ +#define ralloc_array(ctx, type, count) \ + ((type *) ralloc_array_size(ctx, sizeof(type), count)) + +/** + * \def rzalloc_array(ctx, type, count) + * Allocate a zero-initialized array chained off the given context. + * + * Similar to \c calloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * This is equivalent to: + * \code + * ((type *) rzalloc_array_size(ctx, sizeof(type), count) + * \endcode + */ +#define rzalloc_array(ctx, type, count) \ + ((type *) rzalloc_array_size(ctx, sizeof(type), count)) + +/** + * \def reralloc(ctx, ptr, type, count) + * Resize a ralloc-managed array, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param type The element type. + * \param count The number of elements to allocate. + */ +#define reralloc(ctx, ptr, type, count) \ + ((type *) reralloc_array_size(ctx, ptr, sizeof(type), count)) + +/** + * Allocate memory for an array chained off the given context. + * + * Similar to \c calloc, but does not initialize the memory to zero. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \p size and \p count. This is necessary for security. + */ +void *ralloc_array_size(const void *ctx, size_t size, unsigned count); + +/** + * Allocate a zero-initialized array chained off the given context. + * + * Similar to \c calloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \p size and \p count. This is necessary for security. + */ +void *rzalloc_array_size(const void *ctx, size_t size, unsigned count); + +/** + * Resize a ralloc-managed array, preserving data. + * + * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the + * memory. Instead, it resizes it to a 0-byte ralloc context, just like + * calling ralloc_size(ctx, 0). This is different from talloc. + * + * More than a convenience function, this also checks for integer overflow when + * multiplying \c sizeof(type) and \p count. This is necessary for security. + * + * \param ctx The context to use for new allocation. If \p ptr != NULL, + * it must be the same as ralloc_parent(\p ptr). + * \param ptr Pointer to the array to be resized. May be NULL. + * \param size The size of an individual element. + * \param count The number of elements to allocate. + * + * \return True unless allocation failed. + */ +void *reralloc_array_size(const void *ctx, void *ptr, size_t size, + unsigned count); +/// @} + +/** + * Free a piece of ralloc-managed memory. + * + * This will also free the memory of any children allocated this context. + */ +void ralloc_free(void *ptr); + +/** + * "Steal" memory from one context, changing it to another. + * + * This changes \p ptr's context to \p new_ctx. This is quite useful if + * memory is allocated out of a temporary context. + */ +void ralloc_steal(const void *new_ctx, void *ptr); + +/** + * Return the given pointer's ralloc context. + */ +void *ralloc_parent(const void *ptr); + +/** + * Return a context whose memory will be automatically freed at program exit. + * + * The first call to this function creates a context and registers a handler + * to free it using \c atexit. This may cause trouble if used in a library + * loaded with \c dlopen. + */ +void *ralloc_autofree_context(void); + +/** + * Set a callback to occur just before an object is freed. + */ +void ralloc_set_destructor(const void *ptr, void(*destructor)(void *)); + +/// \defgroup array String Functions @{ +/** + * Duplicate a string, allocating the memory from the given context. + */ +char *ralloc_strdup(const void *ctx, const char *str); + +/** + * Duplicate a string, allocating the memory from the given context. + * + * Like \c strndup, at most \p n characters are copied. If \p str is longer + * than \p n characters, \p n are copied, and a termining \c '\0' byte is added. + */ +char *ralloc_strndup(const void *ctx, const char *str, size_t n); + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends \p str to \p *dest, similar to \c strcat, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated. + * + * \return True unless allocation failed. + */ +bool ralloc_strcat(char **dest, const char *str); + +/** + * Concatenate two strings, allocating the necessary space. + * + * This appends at most \p n bytes of \p str to \p *dest, using ralloc_resize + * to expand \p *dest to the appropriate size. \p dest will be updated to the + * new pointer unless allocation fails. + * + * The result will always be null-terminated; \p str does not need to be null + * terminated if it is longer than \p n. + * + * \return True unless allocation failed. + */ +bool ralloc_strncat(char **dest, const char *str, size_t n); + +/** + * Print to a string. + * + * This is analogous to \c sprintf, but allocates enough space (using \p ctx + * as the context) for the resulting string. + * + * \return The newly allocated string. + */ +char *ralloc_asprintf (const void *ctx, const char *fmt, ...) PRINTFLIKE(2, 3); + +/** + * Print to a string, given a va_list. + * + * This is analogous to \c vsprintf, but allocates enough space (using \p ctx + * as the context) for the resulting string. + * + * \return The newly allocated string. + */ +char *ralloc_vasprintf(const void *ctx, const char *fmt, va_list args); + +/** + * Rewrite the tail of an existing string, starting at a given index. + * + * Overwrites the contents of *str starting at \p start with newly formatted + * text, including a new null-terminator. Allocates more memory as necessary. + * + * This can be used to append formatted text when the length of the existing + * string is already known, saving a strlen() call. + * + * \sa ralloc_asprintf_append + * + * \param str The string to be updated. + * \param start The index to start appending new data at. + * \param fmt A printf-style formatting string + * + * \p str will be updated to the new pointer unless allocation fails. + * \p start will be increased by the length of the newly formatted text. + * + * \return True unless allocation failed. + */ +bool ralloc_asprintf_rewrite_tail(char **str, size_t *start, + const char *fmt, ...) + PRINTFLIKE(3, 4); + +/** + * Rewrite the tail of an existing string, starting at a given index. + * + * Overwrites the contents of *str starting at \p start with newly formatted + * text, including a new null-terminator. Allocates more memory as necessary. + * + * This can be used to append formatted text when the length of the existing + * string is already known, saving a strlen() call. + * + * \sa ralloc_vasprintf_append + * + * \param str The string to be updated. + * \param start The index to start appending new data at. + * \param fmt A printf-style formatting string + * \param args A va_list containing the data to be formatted + * + * \p str will be updated to the new pointer unless allocation fails. + * \p start will be increased by the length of the newly formatted text. + * + * \return True unless allocation failed. + */ +bool ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt, + va_list args); + +/** + * Append formatted text to the supplied string. + * + * This is equivalent to + * \code + * ralloc_asprintf_rewrite_tail(str, strlen(*str), fmt, ...) + * \endcode + * + * \sa ralloc_asprintf + * \sa ralloc_asprintf_rewrite_tail + * \sa ralloc_strcat + * + * \p str will be updated to the new pointer unless allocation fails. + * + * \return True unless allocation failed. + */ +bool ralloc_asprintf_append (char **str, const char *fmt, ...) + PRINTFLIKE(2, 3); + +/** + * Append formatted text to the supplied string, given a va_list. + * + * This is equivalent to + * \code + * ralloc_vasprintf_rewrite_tail(str, strlen(*str), fmt, args) + * \endcode + * + * \sa ralloc_vasprintf + * \sa ralloc_vasprintf_rewrite_tail + * \sa ralloc_strcat + * + * \p str will be updated to the new pointer unless allocation fails. + * + * \return True unless allocation failed. + */ +bool ralloc_vasprintf_append(char **str, const char *fmt, va_list args); +/// @} + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif diff --git a/assembler/test/.gitignore b/assembler/test/.gitignore new file mode 100644 index 0000000..090a4f9 --- /dev/null +++ b/assembler/test/.gitignore @@ -0,0 +1,21 @@ +*.out +mov +frc +rndd +rnde +rnde-intsrc +rndu +rndz +lzd +not +jmpi +if +iff +while +else +break +cont +halt +wait +endif +immediate diff --git a/assembler/test/Makefile.am b/assembler/test/Makefile.am new file mode 100644 index 0000000..0d8d349 --- /dev/null +++ b/assembler/test/Makefile.am @@ -0,0 +1,84 @@ +check_SCRIPTS = run-test.sh + +TESTS_ENVIRONMENT = top_builddir=${top_builddir} +TESTS = \ + mov \ + frc \ + rndd \ + rndu \ + rnde \ + rnde-intsrc \ + rndz \ + lzd \ + not \ + jmpi \ + if \ + iff \ + while \ + else \ + break \ + cont \ + halt \ + wait \ + endif \ + declare \ + immediate + +# Tests that are expected to fail because they contain some inccorect code. +XFAIL_TESTS = \ + rnde-intsrc + +TESTDATA = \ + mov.expected \ + mov.g4a \ + frc.expected \ + frc.g4a \ + rndd.expected \ + rndd.g4a \ + rndu.expected \ + rndu.g4a \ + rnde.expected \ + rnde.g4a \ + rnde-intsrc.expected \ + rnde-intsrc.g4a \ + rndz.expected \ + rndz.g4a \ + lzd.expected \ + lzd.g4a \ + not.expected \ + not.g4a \ + jmpi.expected \ + jmpi.g4a \ + if.expected \ + if.g4a \ + iff.expected \ + iff.g4a \ + while.expected \ + while.g4a \ + else.expected \ + else.g4a \ + break.expected \ + break.g4a \ + cont.expected \ + cont.g4a \ + halt.expected \ + halt.g4a \ + wait.expected \ + wait.g4a \ + endif.expected \ + endif.g4a \ + declare.expected \ + declare.g4a \ + immediate.g4a \ + immediate.expected + +EXTRA_DIST = \ + ${TESTDATA} \ + run-test.sh + +$(TESTS): run-test.sh + chmod +x ${srcdir}/run-test.sh + +CLEANFILES = \ + *.out \ + ${TESTS} diff --git a/assembler/test/break.expected b/assembler/test/break.expected new file mode 100644 index 0000000..4e3e4eb --- /dev/null +++ b/assembler/test/break.expected @@ -0,0 +1 @@ + { 0x00000028, 0x34001c00, 0x00011400, 0x00010002 }, diff --git a/assembler/test/break.g4a b/assembler/test/break.g4a new file mode 100644 index 0000000..f23a0ba --- /dev/null +++ b/assembler/test/break.g4a @@ -0,0 +1,6 @@ +/* The break instruction syntax, which is currently just what was in the BNF, + * is bad. It really needs 2 arguments -- pop count (19:16, how many + * loops to break out of), and the IP count (15:0). For now, this argument + * should cover 1 loop, and jumping 2 instructions. + */ +break 65538; diff --git a/assembler/test/cont.expected b/assembler/test/cont.expected new file mode 100644 index 0000000..a1cd936 --- /dev/null +++ b/assembler/test/cont.expected @@ -0,0 +1 @@ + { 0x00000029, 0x34001c00, 0x00011400, 0x00010002 }, diff --git a/assembler/test/cont.g4a b/assembler/test/cont.g4a new file mode 100644 index 0000000..300e7d8 --- /dev/null +++ b/assembler/test/cont.g4a @@ -0,0 +1,6 @@ +/* The cont instruction syntax, which is currently just what was in the BNF, + * is bad. It really needs 2 arguments -- pop count (19:16, how many + * loops to break out of), and the IP count (15:0). For now, this argument + * should cover 1 loop, and jumping 2 instructions. + */ +cont 65538; diff --git a/assembler/test/declare.expected b/assembler/test/declare.expected new file mode 100644 index 0000000..36ad68f --- /dev/null +++ b/assembler/test/declare.expected @@ -0,0 +1,3 @@ + { 0x00e00040, 0x20007fbd, 0x008d0f64, 0x3f9d70a4 }, + { 0x00e00040, 0x200077bd, 0x008d0f64, 0x008d0020 }, + { 0x00e00040, 0x2f6477bd, 0x008d0000, 0x008d0020 }, diff --git a/assembler/test/declare.g4a b/assembler/test/declare.g4a new file mode 100644 index 0000000..d3414e4 --- /dev/null +++ b/assembler/test/declare.g4a @@ -0,0 +1,5 @@ +.declare X1 Base=g99.0 ElementSize=1 SrcRegion=<8,8,1> DstRegion=<1> Type=F +.declare X1 Base=g123.4 ElementSize=4 SrcRegion=<8,8,1> DstRegion=<1> Type=F +add g0<1>:f X1 1.23:f; +add g0<1>:f X1 g1<8,8,1>:f; +add X1 g0<8,8,1>:f g1<8,8,1>:f; diff --git a/assembler/test/else.expected b/assembler/test/else.expected new file mode 100644 index 0000000..bdc77e4 --- /dev/null +++ b/assembler/test/else.expected @@ -0,0 +1 @@ + { 0x00000024, 0x34001c00, 0x00011400, 0x00010002 }, diff --git a/assembler/test/else.g4a b/assembler/test/else.g4a new file mode 100644 index 0000000..f410380 --- /dev/null +++ b/assembler/test/else.g4a @@ -0,0 +1 @@ +else 2; diff --git a/assembler/test/endif.expected b/assembler/test/endif.expected new file mode 100644 index 0000000..b8a3003 --- /dev/null +++ b/assembler/test/endif.expected @@ -0,0 +1 @@ + { 0x00000025, 0x00001c00, 0x00000000, 0x00010000 }, diff --git a/assembler/test/endif.g4a b/assembler/test/endif.g4a new file mode 100644 index 0000000..b3b09fa --- /dev/null +++ b/assembler/test/endif.g4a @@ -0,0 +1 @@ +endif; diff --git a/assembler/test/frc.expected b/assembler/test/frc.expected new file mode 100644 index 0000000..e93f8f7 --- /dev/null +++ b/assembler/test/frc.expected @@ -0,0 +1 @@ + { 0x00000001, 0x20000021, 0x00000020, 0x00000000 }, diff --git a/assembler/test/frc.g4a b/assembler/test/frc.g4a new file mode 100644 index 0000000..8844f67 --- /dev/null +++ b/assembler/test/frc.g4a @@ -0,0 +1 @@ +mov (1) g0<1>UD g1<0,1,0>UD { align1 }; diff --git a/assembler/test/halt.expected b/assembler/test/halt.expected new file mode 100644 index 0000000..b92db85 --- /dev/null +++ b/assembler/test/halt.expected @@ -0,0 +1 @@ + { 0x0000002a, 0x34001c00, 0x00011400, 0x00000002 }, diff --git a/assembler/test/halt.g4a b/assembler/test/halt.g4a new file mode 100644 index 0000000..e6952b1 --- /dev/null +++ b/assembler/test/halt.g4a @@ -0,0 +1 @@ +halt 2; diff --git a/assembler/test/if.expected b/assembler/test/if.expected new file mode 100644 index 0000000..d2fa54d --- /dev/null +++ b/assembler/test/if.expected @@ -0,0 +1 @@ + { 0x00000022, 0x34001c00, 0x00011400, 0x00000002 }, diff --git a/assembler/test/if.g4a b/assembler/test/if.g4a new file mode 100644 index 0000000..60ba4da --- /dev/null +++ b/assembler/test/if.g4a @@ -0,0 +1 @@ +if 2; diff --git a/assembler/test/iff.expected b/assembler/test/iff.expected new file mode 100644 index 0000000..b5dd6f6 --- /dev/null +++ b/assembler/test/iff.expected @@ -0,0 +1 @@ + { 0x00000023, 0x34001c00, 0x00011400, 0x00000002 }, diff --git a/assembler/test/iff.g4a b/assembler/test/iff.g4a new file mode 100644 index 0000000..d728ed0 --- /dev/null +++ b/assembler/test/iff.g4a @@ -0,0 +1 @@ +iff 2; diff --git a/assembler/test/immediate.expected b/assembler/test/immediate.expected new file mode 100644 index 0000000..b1aa921 --- /dev/null +++ b/assembler/test/immediate.expected @@ -0,0 +1,3 @@ + { 0x00000001, 0x20000061, 0x00000000, 0xffffffff }, + { 0x00000001, 0x200000e1, 0x00000000, 0x7fffffff }, + { 0x00000001, 0x200000e1, 0x00000000, 0x80000000 }, diff --git a/assembler/test/immediate.g4a b/assembler/test/immediate.g4a new file mode 100644 index 0000000..4b9e2d3 --- /dev/null +++ b/assembler/test/immediate.g4a @@ -0,0 +1,3 @@ +mov (1) g0<1>UD 4294967295UD { align1 }; +mov (1) g0<1>UD 2147483647D { align1 }; +mov (1) g0<1>UD -2147483648D { align1 }; diff --git a/assembler/test/jmpi.expected b/assembler/test/jmpi.expected new file mode 100644 index 0000000..a53a036 --- /dev/null +++ b/assembler/test/jmpi.expected @@ -0,0 +1 @@ + { 0x00000020, 0x34001c00, 0x00011400, 0x00000002 }, diff --git a/assembler/test/jmpi.g4a b/assembler/test/jmpi.g4a new file mode 100644 index 0000000..7503dd4 --- /dev/null +++ b/assembler/test/jmpi.g4a @@ -0,0 +1 @@ +jmpi 2; diff --git a/assembler/test/lzd.expected b/assembler/test/lzd.expected new file mode 100644 index 0000000..1df4db9 --- /dev/null +++ b/assembler/test/lzd.expected @@ -0,0 +1 @@ + { 0x0000004a, 0x20000021, 0x00000020, 0x00000000 }, diff --git a/assembler/test/lzd.g4a b/assembler/test/lzd.g4a new file mode 100644 index 0000000..b644d76 --- /dev/null +++ b/assembler/test/lzd.g4a @@ -0,0 +1 @@ +lzd (1) g0<1>UD g1<0,1,0>UD { align1 }; diff --git a/assembler/test/mov.expected b/assembler/test/mov.expected new file mode 100644 index 0000000..e93f8f7 --- /dev/null +++ b/assembler/test/mov.expected @@ -0,0 +1 @@ + { 0x00000001, 0x20000021, 0x00000020, 0x00000000 }, diff --git a/assembler/test/mov.g4a b/assembler/test/mov.g4a new file mode 100644 index 0000000..8844f67 --- /dev/null +++ b/assembler/test/mov.g4a @@ -0,0 +1 @@ +mov (1) g0<1>UD g1<0,1,0>UD { align1 }; diff --git a/assembler/test/not.expected b/assembler/test/not.expected new file mode 100644 index 0000000..072d7ab --- /dev/null +++ b/assembler/test/not.expected @@ -0,0 +1 @@ + { 0x00000004, 0x20000021, 0x00000020, 0x00000000 }, diff --git a/assembler/test/not.g4a b/assembler/test/not.g4a new file mode 100644 index 0000000..69d9f8c --- /dev/null +++ b/assembler/test/not.g4a @@ -0,0 +1 @@ +not (1) g0<1>UD g1<0,1,0>UD { align1 }; diff --git a/assembler/test/rndd.expected b/assembler/test/rndd.expected new file mode 100644 index 0000000..a841e25 --- /dev/null +++ b/assembler/test/rndd.expected @@ -0,0 +1 @@ + { 0x00000045, 0x200003a1, 0x00000020, 0x00000000 }, diff --git a/assembler/test/rndd.g4a b/assembler/test/rndd.g4a new file mode 100644 index 0000000..832a544 --- /dev/null +++ b/assembler/test/rndd.g4a @@ -0,0 +1 @@ +rndd (1) g0<1>UD g1<0,1,0>F { align1 }; diff --git a/assembler/test/rnde-intsrc.expected b/assembler/test/rnde-intsrc.expected new file mode 100644 index 0000000..1138d73 --- /dev/null +++ b/assembler/test/rnde-intsrc.expected @@ -0,0 +1 @@ + { 0x00000046, 0x20000021, 0x00000020, 0x00000000 }, diff --git a/assembler/test/rnde-intsrc.g4a b/assembler/test/rnde-intsrc.g4a new file mode 100644 index 0000000..68562fa --- /dev/null +++ b/assembler/test/rnde-intsrc.g4a @@ -0,0 +1,2 @@ +/* Non-float types are not permitted in the sources of round instructions. */ +rnde (1) g0<1>UD g1<0,1,0>UD { align1 }; diff --git a/assembler/test/rnde.expected b/assembler/test/rnde.expected new file mode 100644 index 0000000..2155379 --- /dev/null +++ b/assembler/test/rnde.expected @@ -0,0 +1 @@ + { 0x00000046, 0x200003a1, 0x00000020, 0x00000000 }, diff --git a/assembler/test/rnde.g4a b/assembler/test/rnde.g4a new file mode 100644 index 0000000..9bc13cb --- /dev/null +++ b/assembler/test/rnde.g4a @@ -0,0 +1 @@ +rnde (1) g0<1>UD g1<0,1,0>F { align1 }; diff --git a/assembler/test/rndu.expected b/assembler/test/rndu.expected new file mode 100644 index 0000000..46e26c1 --- /dev/null +++ b/assembler/test/rndu.expected @@ -0,0 +1 @@ + { 0x00000044, 0x200003a1, 0x00000020, 0x00000000 }, diff --git a/assembler/test/rndu.g4a b/assembler/test/rndu.g4a new file mode 100644 index 0000000..6321f2e --- /dev/null +++ b/assembler/test/rndu.g4a @@ -0,0 +1 @@ +rndu (1) g0<1>UD g1<0,1,0>F { align1 }; diff --git a/assembler/test/rndz.expected b/assembler/test/rndz.expected new file mode 100644 index 0000000..9045cfc --- /dev/null +++ b/assembler/test/rndz.expected @@ -0,0 +1 @@ + { 0x00000047, 0x200003a1, 0x00000020, 0x00000000 }, diff --git a/assembler/test/rndz.g4a b/assembler/test/rndz.g4a new file mode 100644 index 0000000..6dd60f7 --- /dev/null +++ b/assembler/test/rndz.g4a @@ -0,0 +1 @@ +rndz (1) g0<1>UD g1<0,1,0>F { align1 }; diff --git a/assembler/test/run-test.sh b/assembler/test/run-test.sh new file mode 100644 index 0000000..e02a6e0 --- /dev/null +++ b/assembler/test/run-test.sh @@ -0,0 +1,83 @@ +#!/bin/sh + +#TODO: add new test cases in environment variables ${TEST_GEN4_XXX} + +DIR="$( cd -P "$( dirname "$0" )" && pwd )" +ASSEMBLER="${DIR}/../src/intel-gen4asm" + +# Tests that are expected to success because they contain correct code. +# $1 is the gen level, e.g., 4 or 7 +# $2 is the test case name +function check_if_work() +{ + GEN_LEVEL="$1" + TEST_CASE_NAME="$2" + SOURCE="${TEST_CASE_NAME}.g${1}a" + EXPECTED="${TEST_CASE_NAME}.expected" + TEMP_OUT="temp.out" + ${ASSEMBLER} -g ${GEN_LEVEL} ${DIR}/${SOURCE} -o ${TEMP_OUT} + if cmp ${TEMP_OUT} ${DIR}/${EXPECTED} 2> /dev/null; + then + echo "[ OK ] ${TEST_CASE_NAME}"; + else + echo "[FAIL] ${TEST_CASE_NAME}"; + diff -u ${DIR}/${EXPECTED} ${TEMP_OUT}; + fi +} + +# Tests that are expected to fail because they contain wrong code. +function check_if_fail() +{ + GEN_LEVEL="$1" + TEST_CASE_NAME="$2" + SOURCE="${TEST_CASE_NAME}.g${1}a" + TEMP_OUT="temp.out" + ${ASSEMBLER} -g ${GEN_LEVEL} ${DIR}/${SOURCE} -o ${TEMP_OUT} 2>/dev/null + if [ $? -eq 0 ]; + then + echo "[FAIL] ${TEST_CASE_NAME}"; + else + echo "[ OK ] ${TEST_CASE_NAME}"; + fi +} + +# Tests that are expected to success because they contain correct code. +TEST_GEN4_SHOULD_WORK="\ + mov \ + frc \ + rndd \ + rndu \ + rnde \ + rnde-intsrc \ + rndz \ + lzd \ + not \ + jmpi \ + if \ + iff \ + while \ + else \ + break \ + cont \ + halt \ + wait \ + endif \ + declare \ + immediate \ + " + +# Tests that are expected to fail because they contain wrong code. +TEST_GEN4_SHOULD_FAIL="\ + rnde-intsrc \ + " + +for T in ${TEST_GEN4_SHOULD_WORK} +do + check_if_work 4 ${T} +done + +for T in ${TEST_GEN4_SHOULD_FAIL} +do + check_if_fail 4 ${T} +done + diff --git a/assembler/test/wait.expected b/assembler/test/wait.expected new file mode 100644 index 0000000..06a055b --- /dev/null +++ b/assembler/test/wait.expected @@ -0,0 +1 @@ + { 0x00000030, 0x20000000, 0x00001200, 0x00010000 }, diff --git a/assembler/test/wait.g4a b/assembler/test/wait.g4a new file mode 100644 index 0000000..59d11fa --- /dev/null +++ b/assembler/test/wait.g4a @@ -0,0 +1 @@ +wait n0; diff --git a/assembler/test/while.expected b/assembler/test/while.expected new file mode 100644 index 0000000..adad703 --- /dev/null +++ b/assembler/test/while.expected @@ -0,0 +1 @@ + { 0x00000027, 0x34001c00, 0x00011400, 0x0000fffe }, diff --git a/assembler/test/while.g4a b/assembler/test/while.g4a new file mode 100644 index 0000000..4f5e1df --- /dev/null +++ b/assembler/test/while.g4a @@ -0,0 +1 @@ +while -2; @@ -9,4 +9,4 @@ cd $srcdir autoreconf -v --install || exit 1 cd $ORIGDIR || exit $? -$srcdir/configure --enable-maintainer-mode "$@" +$srcdir/configure "$@" diff --git a/configure.ac b/configure.ac index 5e2dbed..1c4e1c6 100644 --- a/configure.ac +++ b/configure.ac @@ -36,9 +36,13 @@ AC_GNU_SOURCE AM_INIT_AUTOMAKE([foreign dist-bzip2]) AM_PATH_PYTHON([3],, [:]) -AM_MAINTAINER_MODE + +AC_PROG_CC +AM_PROG_LEX +AC_PROG_YACC # Checks for functions, headers, structures, etc. +AC_HEADER_STDC AC_CHECK_HEADERS([termios.h]) AC_CHECK_MEMBERS([struct sysinfo.totalram],[],[],[AC_INCLUDES_DEFAULT #include <sys/sysinfo.h> @@ -56,6 +60,16 @@ m4_ifndef([XORG_MACROS_VERSION], XORG_MACROS_VERSION(1.16) XORG_DEFAULT_OPTIONS +# warning flags for the assembler. We can't quite use CWARNFLAGS for it yet as +# it generates waaaay to many warnings. +ASSEMBLER_WARN_CFLAGS="" +if test "x$GCC" = "xyes"; then + ASSEMBLER_WARN_CFLAGS="-Wall -Wstrict-prototypes \ + -Wmissing-prototypes -Wmissing-declarations \ + -Wnested-externs -fno-strict-aliasing" +fi +AC_SUBST(ASSEMBLER_WARN_CFLAGS) + PKG_CHECK_MODULES(DRM, [libdrm_intel >= 2.4.38 libdrm]) PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10]) @@ -67,6 +81,12 @@ if test x"$udev" = xyes; then fi PKG_CHECK_MODULES(GLIB, glib-2.0) +# can we build the assembler? +AS_IF([test x"$LEX" != "x:" -a x"$YACC" != xyacc], + [enable_assembler=yes], + [enable_assembler=no]) +AM_CONDITIONAL(BUILD_ASSEMBLER, [test "x$enable_assembler" = xyes]) + # ----------------------------------------------------------------------------- # Configuration options # ----------------------------------------------------------------------------- @@ -130,6 +150,9 @@ if test "x$BUILD_SHADER_DEBUGGER" != xno; then fi AM_CONDITIONAL(BUILD_SHADER_DEBUGGER, [test "x$BUILD_SHADER_DEBUGGER" != xno]) +AS_IF([test "x$BUILD_SHADER_DEBUGGER" != no], + [enable_debugger=yes], [enable_debugger=no]) + # ----------------------------------------------------------------------------- # To build multithread code, gcc uses -pthread, Solaris Studio cc uses -mt @@ -157,7 +180,21 @@ AC_CONFIG_FILES([ tools/quick_dump/Makefile debugger/Makefile debugger/system_routine/Makefile + assembler/Makefile + assembler/doc/Makefile + assembler/test/Makefile + assembler/intel-gen4asm.pc ]) AC_OUTPUT +# Print a summary of the compilation +echo "" +echo "Intel GPU tools" + +echo "" +echo " • Tools:" +echo " Assembler: ${enable_assembler}" +echo " Debugger: ${enable_debugger}" +echo "" + # vim: set ft=config ts=8 sw=8 tw=0 noet : diff --git a/debugger/Makefile.am b/debugger/Makefile.am index d76e2ac..f1e49b9 100644 --- a/debugger/Makefile.am +++ b/debugger/Makefile.am @@ -11,6 +11,7 @@ AM_CPPFLAGS = \ AM_CFLAGS = \ $(DRM_CFLAGS) \ $(PCIACCESS_CFLAGS) \ + $(CAIRO_CFLAGS) \ $(CWARNFLAGS) LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) |