summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDamien Lespiau <damien.lespiau@intel.com>2013-02-14 18:50:06 +0000
committerDamien Lespiau <damien.lespiau@intel.com>2013-02-14 19:00:37 +0000
commit89cabd785121aa92514c70d479b0b5453ef88e04 (patch)
tree2fb240b9e9ad79c73afb226d3dfb44ec1407570f
parent50c45f9586843bb3b83d9bed5d9738145ba05866 (diff)
parentba2885b09e7c3f4870e4423abbbde6f432ee2378 (diff)
assembler: Merge the assembler branchassembler-merged
Conflicts: configure.ac: minor conflict with Ben's dumper work Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
-rw-r--r--.gitignore7
-rw-r--r--Makefile.am4
-rw-r--r--assembler/.gitignore18
-rw-r--r--assembler/Makefile.am51
-rw-r--r--assembler/README9
-rw-r--r--assembler/TODO14
-rw-r--r--assembler/brw_compat.h67
-rw-r--r--assembler/brw_context.c44
-rw-r--r--assembler/brw_context.h78
-rw-r--r--assembler/brw_defines.h1642
-rw-r--r--assembler/brw_disasm.c1348
-rw-r--r--assembler/brw_eu.c268
-rw-r--r--assembler/brw_eu.h427
-rw-r--r--assembler/brw_eu_compact.c810
-rw-r--r--assembler/brw_eu_debug.c92
-rw-r--r--assembler/brw_eu_emit.c2627
-rw-r--r--assembler/brw_eu_util.c125
-rw-r--r--assembler/brw_reg.h808
-rw-r--r--assembler/brw_structs.h1493
-rw-r--r--assembler/disasm-main.c172
-rw-r--r--assembler/doc/Makefile.am3
-rw-r--r--assembler/doc/examples/packed_yuv_sf.g4a17
-rw-r--r--assembler/doc/examples/packed_yuv_wm.g4a161
-rw-r--r--assembler/gen4asm.h227
-rw-r--r--assembler/gram.y3035
-rw-r--r--assembler/intel-gen4asm.pc.in10
-rw-r--r--assembler/lex.l440
-rw-r--r--assembler/main.c520
-rw-r--r--assembler/ralloc.c482
-rw-r--r--assembler/ralloc.h407
-rw-r--r--assembler/test/.gitignore21
-rw-r--r--assembler/test/Makefile.am84
-rw-r--r--assembler/test/break.expected1
-rw-r--r--assembler/test/break.g4a6
-rw-r--r--assembler/test/cont.expected1
-rw-r--r--assembler/test/cont.g4a6
-rw-r--r--assembler/test/declare.expected3
-rw-r--r--assembler/test/declare.g4a5
-rw-r--r--assembler/test/else.expected1
-rw-r--r--assembler/test/else.g4a1
-rw-r--r--assembler/test/endif.expected1
-rw-r--r--assembler/test/endif.g4a1
-rw-r--r--assembler/test/frc.expected1
-rw-r--r--assembler/test/frc.g4a1
-rw-r--r--assembler/test/halt.expected1
-rw-r--r--assembler/test/halt.g4a1
-rw-r--r--assembler/test/if.expected1
-rw-r--r--assembler/test/if.g4a1
-rw-r--r--assembler/test/iff.expected1
-rw-r--r--assembler/test/iff.g4a1
-rw-r--r--assembler/test/immediate.expected3
-rw-r--r--assembler/test/immediate.g4a3
-rw-r--r--assembler/test/jmpi.expected1
-rw-r--r--assembler/test/jmpi.g4a1
-rw-r--r--assembler/test/lzd.expected1
-rw-r--r--assembler/test/lzd.g4a1
-rw-r--r--assembler/test/mov.expected1
-rw-r--r--assembler/test/mov.g4a1
-rw-r--r--assembler/test/not.expected1
-rw-r--r--assembler/test/not.g4a1
-rw-r--r--assembler/test/rndd.expected1
-rw-r--r--assembler/test/rndd.g4a1
-rw-r--r--assembler/test/rnde-intsrc.expected1
-rw-r--r--assembler/test/rnde-intsrc.g4a2
-rw-r--r--assembler/test/rnde.expected1
-rw-r--r--assembler/test/rnde.g4a1
-rw-r--r--assembler/test/rndu.expected1
-rw-r--r--assembler/test/rndu.g4a1
-rw-r--r--assembler/test/rndz.expected1
-rw-r--r--assembler/test/rndz.g4a1
-rw-r--r--assembler/test/run-test.sh83
-rw-r--r--assembler/test/wait.expected1
-rw-r--r--assembler/test/wait.g4a1
-rw-r--r--assembler/test/while.expected1
-rw-r--r--assembler/test/while.g4a1
-rwxr-xr-xautogen.sh2
-rw-r--r--configure.ac39
-rw-r--r--debugger/Makefile.am1
78 files changed, 15697 insertions, 2 deletions
diff --git a/.gitignore b/.gitignore
index 063aeec..bfd59dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,10 @@ core
*.swo
*.swp
cscope.*
+TAGS
+
+/assembler/gram.c
+/assembler/gram.h
+/assembler/intel-gen4asm
+/assembler/intel-gen4disasm
+/assembler/lex.c
diff --git a/Makefile.am b/Makefile.am
index 20bca79..67b6563 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -23,6 +23,10 @@ ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4
SUBDIRS = lib man tools scripts benchmarks demos
+if BUILD_ASSEMBLER
+SUBDIRS += assembler
+endif
+
if BUILD_SHADER_DEBUGGER
SUBDIRS += debugger
endif
diff --git a/assembler/.gitignore b/assembler/.gitignore
new file mode 100644
index 0000000..ed1de4e
--- /dev/null
+++ b/assembler/.gitignore
@@ -0,0 +1,18 @@
+.deps
+Makefile
+Makefile.in
+aclocal.m4
+autom4te.cache
+configure
+configure.lineno
+config.log
+config.status
+depcomp
+install-sh
+missing
+*.o
+
+src/intel-gen4asm
+src/gram.c
+src/gram.h
+src/lex.c
diff --git a/assembler/Makefile.am b/assembler/Makefile.am
new file mode 100644
index 0000000..95ba08d
--- /dev/null
+++ b/assembler/Makefile.am
@@ -0,0 +1,51 @@
+SUBDIRS = doc test
+
+noinst_LTLIBRARIES = libbrw.la
+
+bin_PROGRAMS = intel-gen4asm intel-gen4disasm
+
+libbrw_la_SOURCES = \
+ brw_compat.h \
+ brw_context.c \
+ brw_context.h \
+ brw_disasm.c \
+ brw_defines.h \
+ brw_eu.h \
+ brw_eu.c \
+ brw_eu_compact.c \
+ brw_eu_debug.c \
+ brw_eu_emit.c \
+ brw_eu_util.c \
+ brw_reg.h \
+ brw_structs.h \
+ ralloc.c \
+ ralloc.h \
+ $(NULL)
+
+AM_YFLAGS = -d --warnings=all
+AM_CFLAGS= $(ASSEMBLER_WARN_CFLAGS)
+
+LEX = flex -i
+BUILT_SOURCES = gram.h gram.c lex.c
+gram.h: gram.c
+
+intel_gen4asm_SOURCES = \
+ gen4asm.h \
+ gram.y \
+ lex.l \
+ main.c \
+ $(NULL)
+
+intel_gen4asm_LDADD = libbrw.la
+
+intel_gen4disasm_SOURCES = disasm-main.c
+intel_gen4disasm_LDADD = libbrw.la
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = intel-gen4asm.pc
+
+MAINTAINERCLEANFILES = $(BUILT_SOURCES)
+EXTRA_DIST = \
+ README \
+ TODO \
+ intel-gen4asm.pc.in
diff --git a/assembler/README b/assembler/README
new file mode 100644
index 0000000..bfc9586
--- /dev/null
+++ b/assembler/README
@@ -0,0 +1,9 @@
+intel-gen4asm is a program to compile an assembly language for the Intel 965
+Express Chipset. It has been used to construct programs for textured video in
+the 2d driver.
+
+Some examples of gen4 assembly programs are in the doc/examples directory.
+
+Note that the language parsed by this assembler is not exactly what the final
+language is going to look like. In particular, the send instructions need to
+be cleaned up and made more reasonable to program with.
diff --git a/assembler/TODO b/assembler/TODO
new file mode 100644
index 0000000..59e4abf
--- /dev/null
+++ b/assembler/TODO
@@ -0,0 +1,14 @@
+- Add support for push, pop, msave, and mrest instructions
+- Fix up send argument formatting for some send instructions
+- Add send arguments for more send instructions
+- Fix up the sets of registers allowed for send arguments
+- manpage
+- binary output?
+- check for more error cases.
+- boolean types in parser internal structs where appropriate
+- replace GL* with non-GL?
+- support labels for branch/jump instruction destinations
+- support math on immediate operand values
+- break/cont syntax should be better
+- valgrind it
+- do something to allow use as a library?
diff --git a/assembler/brw_compat.h b/assembler/brw_compat.h
new file mode 100644
index 0000000..4bf7f31
--- /dev/null
+++ b/assembler/brw_compat.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * To share code with mesa without having to do big modifications and still be
+ * able to sync files together at a later point, this file holds macros and
+ * types defined in mesa's core headers.
+ */
+
+#ifndef __BRW_COMPAT_H__
+#define __BRW_COMPAT_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * * __builtin_expect macros
+ * */
+#if !defined(__GNUC__)
+# define __builtin_expect(x, y) (x)
+#endif
+
+#ifndef likely
+# ifdef __GNUC__
+# define likely(x) __builtin_expect(!!(x), 1)
+# define unlikely(x) __builtin_expect(!!(x), 0)
+# else
+# define likely(x) (x)
+# define unlikely(x) (x)
+# endif
+#endif
+
+#if (__GNUC__ >= 3)
+#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a)))
+#else
+#define PRINTFLIKE(f, a)
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+#define Elements(x) ARRAY_SIZE(x)
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* __BRW_COMPAT_H__ */
diff --git a/assembler/brw_context.c b/assembler/brw_context.c
new file mode 100644
index 0000000..6f2a964
--- /dev/null
+++ b/assembler/brw_context.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <string.h>
+
+#include "brw_context.h"
+
+static bool
+intel_init_context(struct intel_context *intel, int gen)
+{
+ memset(intel, 0, sizeof(struct intel_context));
+ intel->gen = gen / 10;
+ intel->is_haswell = gen == 75;
+ if (intel->gen >= 5)
+ intel->needs_ff_sync = true;
+
+ return true;
+}
+
+bool
+brw_init_context(struct brw_context *brw, int gen)
+{
+ return intel_init_context(&brw->intel, gen);
+}
diff --git a/assembler/brw_context.h b/assembler/brw_context.h
new file mode 100644
index 0000000..90e66f7
--- /dev/null
+++ b/assembler/brw_context.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * To share code with mesa without having to do big modifications and still be
+ * able to sync files together at a later point, this file stubs the fields
+ * of struct brw_context used by the code we import.
+ */
+
+#ifndef __BRW_CONTEXT_H__
+#define __BRW_CONTEXT_H__
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "brw_structs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef INTEL_DEBUG
+#define INTEL_DEBUG (0)
+#endif
+
+struct intel_context
+{
+ int gen;
+ int gt;
+ bool is_haswell;
+ bool is_g4x;
+ bool needs_ff_sync;
+};
+
+struct brw_context
+{
+ struct intel_context intel;
+};
+
+bool
+brw_init_context(struct brw_context *brw, int gen);
+
+/* brw_disasm.c */
+struct opcode_desc {
+ char *name;
+ int nsrc;
+ int ndst;
+};
+
+extern const struct opcode_desc opcode_descs[128];
+
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* __BRW_CONTEXT_H__ */
diff --git a/assembler/brw_defines.h b/assembler/brw_defines.h
new file mode 100644
index 0000000..98757da
--- /dev/null
+++ b/assembler/brw_defines.h
@@ -0,0 +1,1642 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
+#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK)
+#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/* 3D state:
+ */
+#define PIPE_CONTROL_NOWRITE 0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
+#define PIPE_CONTROL_WRITEDEPTH 0x02
+#define PIPE_CONTROL_WRITETIMESTAMP 0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
+
+#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */
+/* DW0 */
+# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15)
+/* DW1 */
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define BRW_ANISORATIO_2 0
+#define BRW_ANISORATIO_4 1
+#define BRW_ANISORATIO_6 2
+#define BRW_ANISORATIO_8 3
+#define BRW_ANISORATIO_10 4
+#define BRW_ANISORATIO_12 5
+#define BRW_ANISORATIO_14 6
+#define BRW_ANISORATIO_16 7
+
+#define BRW_BLENDFACTOR_ONE 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR 0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
+#define BRW_BLENDFACTOR_DST_ALPHA 0x4
+#define BRW_BLENDFACTOR_DST_COLOR 0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define BRW_BLENDFACTOR_CONST_COLOR 0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define BRW_BLENDFACTOR_ZERO 0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define BRW_BLENDFUNCTION_ADD 0
+#define BRW_BLENDFUNCTION_SUBTRACT 1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define BRW_BLENDFUNCTION_MIN 3
+#define BRW_BLENDFUNCTION_MAX 4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8 0
+#define BRW_ALPHATEST_FORMAT_FLOAT32 1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define BRW_CHROMAKEY_REPLACE_BLACK 1
+
+#define BRW_CLIP_API_OGL 0
+#define BRW_CLIP_API_DX 1
+
+#define BRW_CLIPMODE_NORMAL 0
+#define BRW_CLIPMODE_CLIP_ALL 1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
+#define BRW_CLIPMODE_REJECT_ALL 3
+#define BRW_CLIPMODE_ACCEPT_ALL 4
+#define BRW_CLIPMODE_KERNEL_CLIP 5
+
+#define BRW_CLIP_NDCSPACE 0
+#define BRW_CLIP_SCREENSPACE 1
+
+#define BRW_COMPAREFUNCTION_ALWAYS 0
+#define BRW_COMPAREFUNCTION_NEVER 1
+#define BRW_COMPAREFUNCTION_LESS 2
+#define BRW_COMPAREFUNCTION_EQUAL 3
+#define BRW_COMPAREFUNCTION_LEQUAL 4
+#define BRW_COMPAREFUNCTION_GREATER 5
+#define BRW_COMPAREFUNCTION_NOTEQUAL 6
+#define BRW_COMPAREFUNCTION_GEQUAL 7
+
+#define BRW_COVERAGE_PIXELS_HALF 0
+#define BRW_COVERAGE_PIXELS_1 1
+#define BRW_COVERAGE_PIXELS_2 2
+#define BRW_COVERAGE_PIXELS_4 3
+
+#define BRW_CULLMODE_BOTH 0
+#define BRW_CULLMODE_NONE 1
+#define BRW_CULLMODE_FRONT 2
+#define BRW_CULLMODE_BACK 3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define BRW_DEPTHFORMAT_D32_FLOAT 1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GEN5 */
+#define BRW_DEPTHFORMAT_D16_UNORM 5
+
+#define BRW_FLOATING_POINT_IEEE_754 0
+#define BRW_FLOATING_POINT_NON_IEEE_754 1
+
+#define BRW_FRONTWINDING_CW 0
+#define BRW_FRONTWINDING_CCW 1
+
+#define BRW_SPRITE_POINT_ENABLE 16
+
+#define BRW_CUT_INDEX_ENABLE (1 << 10)
+
+#define BRW_INDEX_BYTE 0
+#define BRW_INDEX_WORD 1
+#define BRW_INDEX_DWORD 2
+
+#define BRW_LOGICOPFUNCTION_CLEAR 0
+#define BRW_LOGICOPFUNCTION_NOR 1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
+#define BRW_LOGICOPFUNCTION_INVERT 5
+#define BRW_LOGICOPFUNCTION_XOR 6
+#define BRW_LOGICOPFUNCTION_NAND 7
+#define BRW_LOGICOPFUNCTION_AND 8
+#define BRW_LOGICOPFUNCTION_EQUIV 9
+#define BRW_LOGICOPFUNCTION_NOOP 10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
+#define BRW_LOGICOPFUNCTION_COPY 12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
+#define BRW_LOGICOPFUNCTION_OR 14
+#define BRW_LOGICOPFUNCTION_SET 15
+
+#define BRW_MAPFILTER_NEAREST 0x0
+#define BRW_MAPFILTER_LINEAR 0x1
+#define BRW_MAPFILTER_ANISOTROPIC 0x2
+
+#define BRW_MIPFILTER_NONE 0
+#define BRW_MIPFILTER_NEAREST 1
+#define BRW_MIPFILTER_LINEAR 3
+
+#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20
+#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10
+#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08
+#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04
+#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02
+#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01
+
+#define BRW_POLYGON_FRONT_FACING 0
+#define BRW_POLYGON_BACK_FACING 1
+
+#define BRW_PREFILTER_ALWAYS 0x0
+#define BRW_PREFILTER_NEVER 0x1
+#define BRW_PREFILTER_LESS 0x2
+#define BRW_PREFILTER_EQUAL 0x3
+#define BRW_PREFILTER_LEQUAL 0x4
+#define BRW_PREFILTER_GREATER 0x5
+#define BRW_PREFILTER_NOTEQUAL 0x6
+#define BRW_PREFILTER_GEQUAL 0x7
+
+#define BRW_PROVOKING_VERTEX_0 0
+#define BRW_PROVOKING_VERTEX_1 1
+#define BRW_PROVOKING_VERTEX_2 2
+
+#define BRW_RASTRULE_UPPER_LEFT 0
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ * http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT 2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define BRW_STENCILOP_KEEP 0
+#define BRW_STENCILOP_ZERO 1
+#define BRW_STENCILOP_REPLACE 2
+#define BRW_STENCILOP_INCRSAT 3
+#define BRW_STENCILOP_DECRSAT 4
+#define BRW_STENCILOP_INCR 5
+#define BRW_STENCILOP_DECR 6
+#define BRW_STENCILOP_INVERT 7
+
+/* Surface state DW0 */
+#define BRW_SURFACE_RC_READ_WRITE (1 << 8)
+#define BRW_SURFACE_MIPLAYOUT_SHIFT 10
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
+#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f
+#define BRW_SURFACE_BLEND_ENABLED (1 << 13)
+#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14
+#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15
+#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16
+#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define BRW_SURFACEFORMAT_R32G32B32A32_SFIXED 0x020
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define BRW_SURFACEFORMAT_R32G32B32_SFIXED 0x050
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
+#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
+#define BRW_SURFACEFORMAT_R32G32_SFIXED 0x0A0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
+#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
+#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
+#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
+#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
+#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
+#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
+#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
+#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
+#define BRW_SURFACEFORMAT_R16_SINT 0x10C
+#define BRW_SURFACEFORMAT_R16_UINT 0x10D
+#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
+#define BRW_SURFACEFORMAT_I16_UNORM 0x111
+#define BRW_SURFACEFORMAT_L16_UNORM 0x112
+#define BRW_SURFACEFORMAT_A16_UNORM 0x113
+#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
+#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM 0x140
+#define BRW_SURFACEFORMAT_R8_SNORM 0x141
+#define BRW_SURFACEFORMAT_R8_SINT 0x142
+#define BRW_SURFACEFORMAT_R8_UINT 0x143
+#define BRW_SURFACEFORMAT_A8_UNORM 0x144
+#define BRW_SURFACEFORMAT_I8_UNORM 0x145
+#define BRW_SURFACEFORMAT_L8_UNORM 0x146
+#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
+#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
+#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
+#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180
+#define BRW_SURFACEFORMAT_R1_UINT 0x181
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
+#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
+#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
+#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
+#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define BRW_SURFACEFORMAT_MONO8 0x18E
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
+#define BRW_SURFACEFORMAT_FXT1 0x192
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
+#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define BRW_SURFACEFORMAT_R32_SFIXED 0x1B2
+#define BRW_SURFACEFORMAT_R10G10B10A2_SNORM 0x1B3
+#define BRW_SURFACEFORMAT_R10G10B10A2_USCALED 0x1B4
+#define BRW_SURFACEFORMAT_R10G10B10A2_SSCALED 0x1B5
+#define BRW_SURFACEFORMAT_R10G10B10A2_SINT 0x1B6
+#define BRW_SURFACEFORMAT_B10G10R10A2_SNORM 0x1B7
+#define BRW_SURFACEFORMAT_B10G10R10A2_USCALED 0x1B8
+#define BRW_SURFACEFORMAT_B10G10R10A2_SSCALED 0x1B9
+#define BRW_SURFACEFORMAT_B10G10R10A2_UINT 0x1BA
+#define BRW_SURFACEFORMAT_B10G10R10A2_SINT 0x1BB
+#define BRW_SURFACE_FORMAT_SHIFT 18
+#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32 0
+#define BRW_SURFACERETURNFORMAT_S1 1
+
+#define BRW_SURFACE_TYPE_SHIFT 29
+#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29)
+#define BRW_SURFACE_1D 0
+#define BRW_SURFACE_2D 1
+#define BRW_SURFACE_3D 2
+#define BRW_SURFACE_CUBE 3
+#define BRW_SURFACE_BUFFER 4
+#define BRW_SURFACE_NULL 7
+
+#define GEN7_SURFACE_IS_ARRAY (1 << 28)
+#define GEN7_SURFACE_VALIGN_2 (0 << 16)
+#define GEN7_SURFACE_VALIGN_4 (1 << 16)
+#define GEN7_SURFACE_HALIGN_4 (0 << 15)
+#define GEN7_SURFACE_HALIGN_8 (1 << 15)
+#define GEN7_SURFACE_TILING_NONE (0 << 13)
+#define GEN7_SURFACE_TILING_X (2 << 13)
+#define GEN7_SURFACE_TILING_Y (3 << 13)
+#define GEN7_SURFACE_ARYSPC_FULL (0 << 10)
+#define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10)
+
+/* Surface state DW2 */
+#define BRW_SURFACE_HEIGHT_SHIFT 19
+#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19)
+#define BRW_SURFACE_WIDTH_SHIFT 6
+#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6)
+#define BRW_SURFACE_LOD_SHIFT 2
+#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2)
+#define GEN7_SURFACE_HEIGHT_SHIFT 16
+#define GEN7_SURFACE_HEIGHT_MASK INTEL_MASK(29, 16)
+#define GEN7_SURFACE_WIDTH_SHIFT 0
+#define GEN7_SURFACE_WIDTH_MASK INTEL_MASK(13, 0)
+
+/* Surface state DW3 */
+#define BRW_SURFACE_DEPTH_SHIFT 21
+#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21)
+#define BRW_SURFACE_PITCH_SHIFT 3
+#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3)
+#define BRW_SURFACE_TILED (1 << 1)
+#define BRW_SURFACE_TILED_Y (1 << 0)
+
+/* Surface state DW4 */
+#define BRW_SURFACE_MIN_LOD_SHIFT 28
+#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28)
+#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4)
+#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4)
+#define GEN7_SURFACE_MULTISAMPLECOUNT_1 (0 << 3)
+#define GEN7_SURFACE_MULTISAMPLECOUNT_4 (2 << 3)
+#define GEN7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3)
+#define GEN7_SURFACE_MSFMT_MSS (0 << 6)
+#define GEN7_SURFACE_MSFMT_DEPTH_STENCIL (1 << 6)
+
+/* Surface state DW5 */
+#define BRW_SURFACE_X_OFFSET_SHIFT 25
+#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25)
+#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24)
+#define BRW_SURFACE_Y_OFFSET_SHIFT 20
+#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20)
+#define GEN7_SURFACE_MIN_LOD_SHIFT 4
+#define GEN7_SURFACE_MIN_LOD_MASK INTEL_MASK(7, 4)
+
+/* Surface state DW6 */
+#define GEN7_SURFACE_MCS_ENABLE (1 << 0)
+#define GEN7_SURFACE_MCS_PITCH_SHIFT 3
+#define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3)
+
+/* Surface state DW7 */
+#define GEN7_SURFACE_SCS_R_SHIFT 25
+#define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25)
+#define GEN7_SURFACE_SCS_G_SHIFT 22
+#define GEN7_SURFACE_SCS_G_MASK INTEL_MASK(24, 22)
+#define GEN7_SURFACE_SCS_B_SHIFT 19
+#define GEN7_SURFACE_SCS_B_MASK INTEL_MASK(21, 19)
+#define GEN7_SURFACE_SCS_A_SHIFT 16
+#define GEN7_SURFACE_SCS_A_MASK INTEL_MASK(18, 16)
+
+/* The actual swizzle values/what channel to use */
+#define HSW_SCS_ZERO 0
+#define HSW_SCS_ONE 1
+#define HSW_SCS_RED 4
+#define HSW_SCS_GREEN 5
+#define HSW_SCS_BLUE 6
+#define HSW_SCS_ALPHA 7
+
+#define BRW_TEXCOORDMODE_WRAP 0
+#define BRW_TEXCOORDMODE_MIRROR 1
+#define BRW_TEXCOORDMODE_CLAMP 2
+#define BRW_TEXCOORDMODE_CUBE 3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define BRW_THREAD_PRIORITY_NORMAL 0
+#define BRW_THREAD_PRIORITY_HIGH 1
+
+#define BRW_TILEWALK_XMAJOR 0
+#define BRW_TILEWALK_YMAJOR 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1 0
+#define BRW_ALIGN_16 1
+
+#define BRW_ADDRESS_DIRECT 0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define BRW_CHANNEL_X 0
+#define BRW_CHANNEL_Y 1
+#define BRW_CHANNEL_Z 2
+#define BRW_CHANNEL_W 3
+
+enum brw_compression {
+ BRW_COMPRESSION_NONE = 0,
+ BRW_COMPRESSION_2NDHALF = 1,
+ BRW_COMPRESSION_COMPRESSED = 2,
+};
+
+#define GEN6_COMPRESSION_1Q 0
+#define GEN6_COMPRESSION_2Q 1
+#define GEN6_COMPRESSION_3Q 2
+#define GEN6_COMPRESSION_4Q 3
+#define GEN6_COMPRESSION_1H 0
+#define GEN6_COMPRESSION_2H 2
+
+#define BRW_CONDITIONAL_NONE 0
+#define BRW_CONDITIONAL_Z 1
+#define BRW_CONDITIONAL_NZ 2
+#define BRW_CONDITIONAL_EQ 1 /* Z */
+#define BRW_CONDITIONAL_NEQ 2 /* NZ */
+#define BRW_CONDITIONAL_G 3
+#define BRW_CONDITIONAL_GE 4
+#define BRW_CONDITIONAL_L 5
+#define BRW_CONDITIONAL_LE 6
+#define BRW_CONDITIONAL_R 7
+#define BRW_CONDITIONAL_O 8
+#define BRW_CONDITIONAL_U 9
+
+#define BRW_DEBUG_NONE 0
+#define BRW_DEBUG_BREAKPOINT 1
+
+#define BRW_DEPENDENCY_NORMAL 0
+#define BRW_DEPENDENCY_NOTCLEARED 1
+#define BRW_DEPENDENCY_NOTCHECKED 2
+#define BRW_DEPENDENCY_DISABLE 3
+
+#define BRW_EXECUTE_1 0
+#define BRW_EXECUTE_2 1
+#define BRW_EXECUTE_4 2
+#define BRW_EXECUTE_8 3
+#define BRW_EXECUTE_16 4
+#define BRW_EXECUTE_32 5
+
+#define BRW_HORIZONTAL_STRIDE_0 0
+#define BRW_HORIZONTAL_STRIDE_1 1
+#define BRW_HORIZONTAL_STRIDE_2 2
+#define BRW_HORIZONTAL_STRIDE_4 3
+
+#define BRW_INSTRUCTION_NORMAL 0
+#define BRW_INSTRUCTION_SATURATE 1
+
+#define BRW_MASK_ENABLE 0
+#define BRW_MASK_DISABLE 1
+
+#define BRW_ACCUMULATOR_WRITE_DISABLE 0
+#define BRW_ACCUMULATOR_WRITE_ENABLE 1
+
+/** @{
+ *
+ * Gen6 has replaced "mask enable/disable" with WECtrl, which is
+ * effectively the same but much simpler to think about. Now, there
+ * are two contributors ANDed together to whether channels are
+ * executed: The predication on the instruction, and the channel write
+ * enable.
+ */
+/**
+ * This is the default value. It means that a channel's write enable is set
+ * if the per-channel IP is pointing at this instruction.
+ */
+#define BRW_WE_NORMAL 0
+/**
+ * This is used like BRW_MASK_DISABLE, and causes all channels to have
+ * their write enable set. Note that predication still contributes to
+ * whether the channel actually gets written.
+ */
+#define BRW_WE_ALL 1
+/** @} */
+
+enum opcode {
+ /* These are the actual hardware opcodes. */
+ BRW_OPCODE_MOV = 1,
+ BRW_OPCODE_SEL = 2,
+ BRW_OPCODE_NOT = 4,
+ BRW_OPCODE_AND = 5,
+ BRW_OPCODE_OR = 6,
+ BRW_OPCODE_XOR = 7,
+ BRW_OPCODE_SHR = 8,
+ BRW_OPCODE_SHL = 9,
+ BRW_OPCODE_RSR = 10,
+ BRW_OPCODE_RSL = 11,
+ BRW_OPCODE_ASR = 12,
+ BRW_OPCODE_CMP = 16,
+ BRW_OPCODE_CMPN = 17,
+ BRW_OPCODE_F32TO16 = 19,
+ BRW_OPCODE_F16TO32 = 20,
+ BRW_OPCODE_BFREV = 23,
+ BRW_OPCODE_BFE = 24,
+ BRW_OPCODE_BFI1 = 25,
+ BRW_OPCODE_BFI2 = 26,
+ BRW_OPCODE_JMPI = 32,
+ BRW_OPCODE_BRD = 33,
+ BRW_OPCODE_IF = 34,
+ BRW_OPCODE_IFF = 35,
+ BRW_OPCODE_BRC = 35,
+ BRW_OPCODE_ELSE = 36,
+ BRW_OPCODE_ENDIF = 37,
+ BRW_OPCODE_DO = 38,
+ BRW_OPCODE_WHILE = 39,
+ BRW_OPCODE_BREAK = 40,
+ BRW_OPCODE_CONTINUE = 41,
+ BRW_OPCODE_HALT = 42,
+ BRW_OPCODE_MSAVE = 44,
+ BRW_OPCODE_CALL = 44,
+ BRW_OPCODE_MRESTORE = 45,
+ BRW_OPCODE_RET = 45,
+ BRW_OPCODE_PUSH = 46,
+ BRW_OPCODE_POP = 47,
+ BRW_OPCODE_WAIT = 48,
+ BRW_OPCODE_SEND = 49,
+ BRW_OPCODE_SENDC = 50,
+ BRW_OPCODE_MATH = 56,
+ BRW_OPCODE_ADD = 64,
+ BRW_OPCODE_MUL = 65,
+ BRW_OPCODE_AVG = 66,
+ BRW_OPCODE_FRC = 67,
+ BRW_OPCODE_RNDU = 68,
+ BRW_OPCODE_RNDD = 69,
+ BRW_OPCODE_RNDE = 70,
+ BRW_OPCODE_RNDZ = 71,
+ BRW_OPCODE_MAC = 72,
+ BRW_OPCODE_MACH = 73,
+ BRW_OPCODE_LZD = 74,
+ BRW_OPCODE_FBH = 75,
+ BRW_OPCODE_FBL = 76,
+ BRW_OPCODE_CBIT = 77,
+ BRW_OPCODE_ADDC = 78,
+ BRW_OPCODE_SUBB = 79,
+ BRW_OPCODE_SAD2 = 80,
+ BRW_OPCODE_SADA2 = 81,
+ BRW_OPCODE_DP4 = 84,
+ BRW_OPCODE_DPH = 85,
+ BRW_OPCODE_DP3 = 86,
+ BRW_OPCODE_DP2 = 87,
+ BRW_OPCODE_DPA2 = 88,
+ BRW_OPCODE_LINE = 89,
+ BRW_OPCODE_PLN = 90,
+ BRW_OPCODE_MAD = 91,
+ BRW_OPCODE_LRP = 92,
+ BRW_OPCODE_NOP = 126,
+
+ /* These are compiler backend opcodes that get translated into other
+ * instructions.
+ */
+ FS_OPCODE_FB_WRITE = 128,
+ SHADER_OPCODE_RCP,
+ SHADER_OPCODE_RSQ,
+ SHADER_OPCODE_SQRT,
+ SHADER_OPCODE_EXP2,
+ SHADER_OPCODE_LOG2,
+ SHADER_OPCODE_POW,
+ SHADER_OPCODE_INT_QUOTIENT,
+ SHADER_OPCODE_INT_REMAINDER,
+ SHADER_OPCODE_SIN,
+ SHADER_OPCODE_COS,
+
+ SHADER_OPCODE_TEX,
+ SHADER_OPCODE_TXD,
+ SHADER_OPCODE_TXF,
+ SHADER_OPCODE_TXL,
+ SHADER_OPCODE_TXS,
+ FS_OPCODE_TXB,
+
+ SHADER_OPCODE_SHADER_TIME_ADD,
+
+ FS_OPCODE_DDX,
+ FS_OPCODE_DDY,
+ FS_OPCODE_PIXEL_X,
+ FS_OPCODE_PIXEL_Y,
+ FS_OPCODE_CINTERP,
+ FS_OPCODE_LINTERP,
+ FS_OPCODE_SPILL,
+ FS_OPCODE_UNSPILL,
+ FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
+ FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
+ FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+ FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
+ FS_OPCODE_DISCARD_JUMP,
+ FS_OPCODE_SET_GLOBAL_OFFSET,
+
+ VS_OPCODE_URB_WRITE,
+ VS_OPCODE_SCRATCH_READ,
+ VS_OPCODE_SCRATCH_WRITE,
+ VS_OPCODE_PULL_CONSTANT_LOAD,
+};
+
+#define BRW_PREDICATE_NONE 0
+#define BRW_PREDICATE_NORMAL 1
+#define BRW_PREDICATE_ALIGN1_ANYV 2
+#define BRW_PREDICATE_ALIGN1_ALLV 3
+#define BRW_PREDICATE_ALIGN1_ANY2H 4
+#define BRW_PREDICATE_ALIGN1_ALL2H 5
+#define BRW_PREDICATE_ALIGN1_ANY4H 6
+#define BRW_PREDICATE_ALIGN1_ALL4H 7
+#define BRW_PREDICATE_ALIGN1_ANY8H 8
+#define BRW_PREDICATE_ALIGN1_ALL8H 9
+#define BRW_PREDICATE_ALIGN1_ANY16H 10
+#define BRW_PREDICATE_ALIGN1_ALL16H 11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
+#define BRW_PREDICATE_ALIGN16_ANY4H 6
+#define BRW_PREDICATE_ALIGN16_ALL4H 7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE 0
+#define BRW_GENERAL_REGISTER_FILE 1
+#define BRW_MESSAGE_REGISTER_FILE 2
+#define BRW_IMMEDIATE_VALUE 3
+
+#define BRW_REGISTER_TYPE_UD 0
+#define BRW_REGISTER_TYPE_D 1
+#define BRW_REGISTER_TYPE_UW 2
+#define BRW_REGISTER_TYPE_W 3
+#define BRW_REGISTER_TYPE_UB 4
+#define BRW_REGISTER_TYPE_B 5
+#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF 6
+#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F 7
+
+#define BRW_REGISTER_3SRC_TYPE_F 0
+#define BRW_REGISTER_3SRC_TYPE_D 1
+#define BRW_REGISTER_3SRC_TYPE_UD 2
+#define BRW_REGISTER_3SRC_TYPE_DF 3
+
+#define BRW_ARF_NULL 0x00
+#define BRW_ARF_ADDRESS 0x10
+#define BRW_ARF_ACCUMULATOR 0x20
+#define BRW_ARF_FLAG 0x30
+#define BRW_ARF_MASK 0x40
+#define BRW_ARF_MASK_STACK 0x50
+#define BRW_ARF_MASK_STACK_DEPTH 0x60
+#define BRW_ARF_STATE 0x70
+#define BRW_ARF_CONTROL 0x80
+#define BRW_ARF_NOTIFICATION_COUNT 0x90
+#define BRW_ARF_IP 0xA0
+#define BRW_ARF_TDR 0xB0
+#define BRW_ARF_TIMESTAMP 0xC0
+
+#define BRW_MRF_COMPR4 (1 << 7)
+
+#define BRW_AMASK 0
+#define BRW_IMASK 1
+#define BRW_LMASK 2
+#define BRW_CMASK 3
+
+
+
+#define BRW_THREAD_NORMAL 0
+#define BRW_THREAD_ATOMIC 1
+#define BRW_THREAD_SWITCH 2
+
+#define BRW_VERTICAL_STRIDE_0 0
+#define BRW_VERTICAL_STRIDE_1 1
+#define BRW_VERTICAL_STRIDE_2 2
+#define BRW_VERTICAL_STRIDE_4 3
+#define BRW_VERTICAL_STRIDE_8 4
+#define BRW_VERTICAL_STRIDE_16 5
+#define BRW_VERTICAL_STRIDE_32 6
+#define BRW_VERTICAL_STRIDE_64 7
+#define BRW_VERTICAL_STRIDE_128 8
+#define BRW_VERTICAL_STRIDE_256 9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define BRW_WIDTH_1 0
+#define BRW_WIDTH_2 1
+#define BRW_WIDTH_4 2
+#define BRW_WIDTH_8 3
+#define BRW_WIDTH_16 4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define BRW_POLYGON_FACING_FRONT 0
+#define BRW_POLYGON_FACING_BACK 1
+
+/**
+ * Message target: Shared Function ID for where to SEND a message.
+ *
+ * These are enumerated in the ISA reference under "send - Send Message".
+ * In particular, see the following tables:
+ * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
+ * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
+ * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
+ * Overview / GPE Function IDs
+ */
+enum brw_message_target {
+ BRW_SFID_NULL = 0,
+ BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */
+ BRW_SFID_SAMPLER = 2,
+ BRW_SFID_MESSAGE_GATEWAY = 3,
+ BRW_SFID_DATAPORT_READ = 4,
+ BRW_SFID_DATAPORT_WRITE = 5,
+ BRW_SFID_URB = 6,
+ BRW_SFID_THREAD_SPAWNER = 7,
+
+ GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4,
+ GEN6_SFID_DATAPORT_RENDER_CACHE = 5,
+ GEN6_SFID_VME = 8,
+ GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
+
+ GEN7_SFID_DATAPORT_DATA_CACHE = 10,
+
+ HSW_SFID_CRE = 0x0d,
+};
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define GEN5_SAMPLER_MESSAGE_SAMPLE 0
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
+#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20
+#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29
+#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30
+#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31
+
+/* for GEN5 only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+/* This one stays the same across generations. */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+/* GEN4 */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+/* GEN6 */
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9
+#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12
+#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14
+
+/* GEN7 */
+#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10
+#define GEN7_DATAPORT_DC_DWORD_SCATTERED_READ 3
+
+/* dataport atomic operations. */
+#define BRW_AOP_AND 1
+#define BRW_AOP_OR 2
+#define BRW_AOP_XOR 3
+#define BRW_AOP_MOV 4
+#define BRW_AOP_INC 5
+#define BRW_AOP_DEC 6
+#define BRW_AOP_ADD 7
+#define BRW_AOP_SUB 8
+#define BRW_AOP_REVSUB 9
+#define BRW_AOP_IMAX 10
+#define BRW_AOP_IMIN 11
+#define BRW_AOP_UMAX 12
+#define BRW_AOP_UMIN 13
+#define BRW_AOP_CMPWR 14
+#define BRW_AOP_PREDEC 15
+
+#define BRW_MATH_FUNCTION_INV 1
+#define BRW_MATH_FUNCTION_LOG 2
+#define BRW_MATH_FUNCTION_EXP 3
+#define BRW_MATH_FUNCTION_SQRT 4
+#define BRW_MATH_FUNCTION_RSQ 5
+#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
+#define BRW_MATH_FUNCTION_POW 10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define BRW_MATH_INTEGER_UNSIGNED 0
+#define BRW_MATH_INTEGER_SIGNED 1
+
+#define BRW_MATH_PRECISION_FULL 0
+#define BRW_MATH_PRECISION_PARTIAL 1
+
+#define BRW_MATH_SATURATE_NONE 0
+#define BRW_MATH_SATURATE_SATURATE 1
+
+#define BRW_MATH_DATA_VECTOR 0
+#define BRW_MATH_DATA_SCALAR 1
+
+#define BRW_URB_OPCODE_WRITE 0
+
+#define BRW_URB_SWIZZLE_NONE 0
+#define BRW_URB_SWIZZLE_INTERLEAVE 1
+#define BRW_URB_SWIZZLE_TRANSPOSE 2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K 0
+#define BRW_SCRATCH_SPACE_SIZE_2K 1
+#define BRW_SCRATCH_SPACE_SIZE_4K 2
+#define BRW_SCRATCH_SPACE_SIZE_8K 3
+#define BRW_SCRATCH_SPACE_SIZE_16K 4
+#define BRW_SCRATCH_SPACE_SIZE_32K 5
+#define BRW_SCRATCH_SPACE_SIZE_64K 6
+#define BRW_SCRATCH_SPACE_SIZE_128K 7
+#define BRW_SCRATCH_SPACE_SIZE_256K 8
+#define BRW_SCRATCH_SPACE_SIZE_512K 9
+#define BRW_SCRATCH_SPACE_SIZE_1M 10
+#define BRW_SCRATCH_SPACE_SIZE_2M 11
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CS_URB_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_SIP 0x6102
+#define CMD_PIPELINE_SELECT_965 0x6104
+#define CMD_PIPELINE_SELECT_GM45 0x6904
+
+#define _3DSTATE_PIPELINED_POINTERS 0x7800
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
+
+#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x7826 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x7827 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x7828 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */
+
+#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */
+# define PS_SAMPLER_STATE_CHANGE (1 << 12)
+# define GS_SAMPLER_STATE_CHANGE (1 << 9)
+# define VS_SAMPLER_STATE_CHANGE (1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
+
+#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */
+
+#define _3DSTATE_VERTEX_BUFFERS 0x7808
+# define BRW_VB0_INDEX_SHIFT 27
+# define GEN6_VB0_INDEX_SHIFT 26
+# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20)
+# define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14)
+# define BRW_VB0_PITCH_SHIFT 0
+
+#define _3DSTATE_VERTEX_ELEMENTS 0x7809
+# define BRW_VE0_INDEX_SHIFT 27
+# define GEN6_VE0_INDEX_SHIFT 26
+# define BRW_VE0_FORMAT_SHIFT 16
+# define BRW_VE0_VALID (1 << 26)
+# define GEN6_VE0_VALID (1 << 25)
+# define GEN6_VE0_EDGE_FLAG_ENABLE (1 << 15)
+# define BRW_VE0_SRC_OFFSET_SHIFT 0
+# define BRW_VE1_COMPONENT_NOSTORE 0
+# define BRW_VE1_COMPONENT_STORE_SRC 1
+# define BRW_VE1_COMPONENT_STORE_0 2
+# define BRW_VE1_COMPONENT_STORE_1_FLT 3
+# define BRW_VE1_COMPONENT_STORE_1_INT 4
+# define BRW_VE1_COMPONENT_STORE_VID 5
+# define BRW_VE1_COMPONENT_STORE_IID 6
+# define BRW_VE1_COMPONENT_STORE_PID 7
+# define BRW_VE1_COMPONENT_0_SHIFT 28
+# define BRW_VE1_COMPONENT_1_SHIFT 24
+# define BRW_VE1_COMPONENT_2_SHIFT 20
+# define BRW_VE1_COMPONENT_3_SHIFT 16
+# define BRW_VE1_DST_OFFSET_SHIFT 0
+
+#define CMD_INDEX_BUFFER 0x780a
+#define GEN4_3DSTATE_VF_STATISTICS 0x780b
+#define GM45_3DSTATE_VF_STATISTICS 0x680b
+#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */
+#define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GEN7+ */
+#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GEN7+ */
+
+#define _3DSTATE_URB 0x7805 /* GEN6 */
+# define GEN6_URB_VS_SIZE_SHIFT 16
+# define GEN6_URB_VS_ENTRIES_SHIFT 0
+# define GEN6_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_URB_GS_SIZE_SHIFT 0
+
+#define _3DSTATE_VF 0x780c /* GEN7.5+ */
+#define HSW_CUT_INDEX_ENABLE (1 << 8)
+
+#define _3DSTATE_URB_VS 0x7830 /* GEN7+ */
+#define _3DSTATE_URB_HS 0x7831 /* GEN7+ */
+#define _3DSTATE_URB_DS 0x7832 /* GEN7+ */
+#define _3DSTATE_URB_GS 0x7833 /* GEN7+ */
+# define GEN7_URB_ENTRY_SIZE_SHIFT 16
+# define GEN7_URB_STARTING_ADDRESS_SHIFT 25
+
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */
+# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
+
+#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
+
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GEN7+ */
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */
+
+#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
+
+#define _3DSTATE_VS 0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE (1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT 25
+# define HSW_VS_MAX_THREADS_SHIFT 23
+# define GEN6_VS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_VS_CACHE_DISABLE (1 << 1)
+# define GEN6_VS_ENABLE (1 << 0)
+
+#define _3DSTATE_GS 0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE (1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT 27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT 11
+# define GEN7_GS_INCLUDE_VERTEX_HANDLES (1 << 10)
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT 25
+# define GEN6_GS_STATISTICS_ENABLE (1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
+# define GEN6_GS_RENDERING_ENABLE (1 << 8)
+# define GEN7_GS_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_GS_REORDER (1 << 30)
+# define GEN6_GS_DISCARD_ADJACENCY (1 << 29)
+# define GEN6_GS_SVBI_PAYLOAD_ENABLE (1 << 28)
+# define GEN6_GS_SVBI_POSTINCREMENT_ENABLE (1 << 27)
+# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT 16
+# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
+# define GEN6_GS_ENABLE (1 << 15)
+
+# define BRW_GS_EDGE_INDICATOR_0 (1 << 8)
+# define BRW_GS_EDGE_INDICATOR_1 (1 << 9)
+
+#define _3DSTATE_HS 0x781B /* GEN7+ */
+#define _3DSTATE_TE 0x781C /* GEN7+ */
+#define _3DSTATE_DS 0x781D /* GEN7+ */
+
+#define _3DSTATE_CLIP 0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN7_CLIP_WINDING_CW (0 << 20)
+# define GEN7_CLIP_WINDING_CCW (1 << 20)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19)
+# define GEN7_CLIP_EARLY_CULL (1 << 18)
+# define GEN7_CLIP_CULLMODE_BOTH (0 << 16)
+# define GEN7_CLIP_CULLMODE_NONE (1 << 16)
+# define GEN7_CLIP_CULLMODE_FRONT (2 << 16)
+# define GEN7_CLIP_CULLMODE_BACK (3 << 16)
+# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
+/**
+ * Just does cheap culling based on the clip distance. Bits must be
+ * disjoint with USER_CLIP_CLIP_DISTANCE bits.
+ */
+# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0
+/* DW2 */
+# define GEN6_CLIP_ENABLE (1 << 31)
+# define GEN6_CLIP_API_OGL (0 << 30)
+# define GEN6_CLIP_API_D3D (1 << 30)
+# define GEN6_CLIP_XY_TEST (1 << 28)
+# define GEN6_CLIP_Z_TEST (1 << 27)
+# define GEN6_CLIP_GB_TEST (1 << 26)
+/** 8-bit field of which user clip distances to clip aganist. */
+# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16
+# define GEN6_CLIP_MODE_NORMAL (0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9)
+# define GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE (1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6
+# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5)
+
+#define _3DSTATE_SF 0x7813 /* GEN6+ */
+/* DW1 (for gen6) */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_SF_SWIZZLE_ENABLE (1 << 21)
+# define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 << 20)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE (1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7)
+# define GEN6_SF_FRONT_SOLID (0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME (1 << 5)
+# define GEN6_SF_FRONT_POINT (2 << 5)
+# define GEN6_SF_BACK_SOLID (0 << 3)
+# define GEN6_SF_BACK_WIREFRAME (1 << 3)
+# define GEN6_SF_BACK_POINT (2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1)
+# define GEN6_SF_WINDING_CCW (1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE (1 << 31)
+# define GEN6_SF_CULL_BOTH (0 << 29)
+# define GEN6_SF_CULL_NONE (1 << 29)
+# define GEN6_SF_CULL_FRONT (2 << 29)
+# define GEN6_SF_CULL_BACK (3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE (1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W (1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X (1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT 22
+# define ATTRIBUTE_1_SOURCE_SHIFT 16
+# define ATTRIBUTE_0_OVERRIDE_W (1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X (1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
+# define ATTRIBUTE_0_SOURCE_SHIFT 0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
+# define ATTRIBUTE_SWIZZLE_SHIFT 6
+
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+/* On GEN7, many fields of 3DSTATE_SF were split out into a new command:
+ * 3DSTATE_SBE. The remaining fields live in different DWords, but retain
+ * the same bit-offset. The only new field:
+ */
+/* GEN7/DW1: */
+# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12
+/* GEN7/DW2: */
+# define HSW_SF_LINE_STIPPLE_ENABLE 14
+
+#define _3DSTATE_SBE 0x781F /* GEN7+ */
+/* DW1 */
+# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28)
+# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22
+# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21)
+# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20)
+# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */
+/* DW10: Point sprite texture coordinate enables */
+/* DW11: Constant interpolation enables */
+/* DW12: attr 0-7 wrap shortest enables */
+/* DW13: attr 8-16 wrap shortest enables */
+
+enum brw_wm_barycentric_interp_mode {
+ BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0,
+ BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC = 1,
+ BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC = 2,
+ BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC = 3,
+ BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC = 4,
+ BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC = 5,
+ BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT = 6
+};
+#define BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS \
+ ((1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC) | \
+ (1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC) | \
+ (1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
+
+#define _3DSTATE_WM 0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE (1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
+# define GEN6_WM_DEPTH_CLEAR (1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE (1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT 25
+# define GEN6_WM_KILL_ENABLE (1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH (1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9)
+# define GEN6_WM_USES_SOURCE_W (1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_WM_POSOFFSET_NONE (0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID (2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+# define GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 10
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1)
+# define GEN6_WM_MSDISPMODE_PERSAMPLE (0 << 0)
+# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */
+#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */
+#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
+
+#define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */
+#define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */
+
+#define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */
+/* DW1 */
+# define SO_FUNCTION_ENABLE (1 << 31)
+# define SO_RENDERING_DISABLE (1 << 30)
+/* This selects which incoming rendering stream goes down the pipeline. The
+ * rendering stream is 0 if not defined by special cases in the GS state.
+ */
+# define SO_RENDER_STREAM_SELECT_SHIFT 27
+# define SO_RENDER_STREAM_SELECT_MASK INTEL_MASK(28, 27)
+/* Controls reordering of TRISTRIP_* elements in stream output (not rendering).
+ */
+# define SO_REORDER_TRAILING (1 << 26)
+/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */
+# define SO_STATISTICS_ENABLE (1 << 25)
+# define SO_BUFFER_ENABLE(n) (1 << (8 + (n)))
+/* DW2 */
+# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT 29
+# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK INTEL_MASK(29, 29)
+# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT 24
+# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK INTEL_MASK(28, 24)
+# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT 21
+# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK INTEL_MASK(21, 21)
+# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT 16
+# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK INTEL_MASK(20, 16)
+# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT 13
+# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK INTEL_MASK(13, 13)
+# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT 8
+# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK INTEL_MASK(12, 8)
+# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT 5
+# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK INTEL_MASK(5, 5)
+# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0
+# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0)
+
+/* 3DSTATE_WM for Gen7 */
+/* DW1 */
+# define GEN7_WM_STATISTICS_ENABLE (1 << 31)
+# define GEN7_WM_DEPTH_CLEAR (1 << 30)
+# define GEN7_WM_DISPATCH_ENABLE (1 << 29)
+# define GEN7_WM_DEPTH_RESOLVE (1 << 28)
+# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
+# define GEN7_WM_KILL_ENABLE (1 << 25)
+# define GEN7_WM_PSCDEPTH_OFF (0 << 23)
+# define GEN7_WM_PSCDEPTH_ON (1 << 23)
+# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23)
+# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23)
+# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN7_WM_USES_SOURCE_W (1 << 19)
+# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17)
+# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17)
+# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17)
+# define GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 11
+# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8)
+# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6)
+# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4)
+# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3)
+# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2)
+# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0)
+# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0)
+# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0)
+# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0)
+/* DW2 */
+# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31)
+# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
+
+#define _3DSTATE_PS 0x7820 /* GEN7+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN7_PS_SPF_MODE (1 << 31)
+# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30)
+# define GEN7_PS_SAMPLER_COUNT_SHIFT 27
+# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define IVB_PS_MAX_THREADS_SHIFT 24
+# define HSW_PS_MAX_THREADS_SHIFT 23
+# define HSW_PS_SAMPLE_MASK_SHIFT 12
+# define HSW_PS_SAMPLE_MASK_MASK INTEL_MASK(19, 12)
+# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
+# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
+# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
+# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
+# define GEN7_PS_POSOFFSET_NONE (0 << 3)
+# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
+# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3)
+# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2)
+# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1)
+# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0)
+/* DW5 */
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0
+/* DW6: kernel 1 pointer */
+/* DW7: kernel 2 pointer */
+
+#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */
+
+#define _3DSTATE_DRAWING_RECTANGLE 0x7900
+#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901
+#define _3DSTATE_CHROMA_KEY 0x7904
+#define _3DSTATE_DEPTH_BUFFER 0x7905 /* GEN4-6 */
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907
+#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */
+
+#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT 29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER (0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define MS_NUMSAMPLES_1 (0 << 1)
+# define MS_NUMSAMPLES_4 (2 << 1)
+# define MS_NUMSAMPLES_8 (3 << 1)
+
+#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */
+#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */
+
+#define GEN7_3DSTATE_CLEAR_PARAMS 0x7804
+#define GEN7_3DSTATE_DEPTH_BUFFER 0x7805
+#define GEN7_3DSTATE_STENCIL_BUFFER 0x7806
+# define HSW_STENCIL_ENABLED (1 << 31)
+#define GEN7_3DSTATE_HIER_DEPTH_BUFFER 0x7807
+
+#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK, SNB */
+# define GEN5_DEPTH_CLEAR_VALID (1 << 15)
+/* DW1: depth clear value */
+/* DW2 */
+# define GEN7_DEPTH_CLEAR_VALID (1 << 0)
+
+#define _3DSTATE_SO_DECL_LIST 0x7917 /* GEN7+ */
+/* DW1 */
+# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT 12
+# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK INTEL_MASK(15, 12)
+# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT 8
+# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK INTEL_MASK(11, 8)
+# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT 4
+# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK INTEL_MASK(7, 4)
+# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT 0
+# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK INTEL_MASK(3, 0)
+/* DW2 */
+# define SO_NUM_ENTRIES_3_SHIFT 24
+# define SO_NUM_ENTRIES_3_MASK INTEL_MASK(31, 24)
+# define SO_NUM_ENTRIES_2_SHIFT 16
+# define SO_NUM_ENTRIES_2_MASK INTEL_MASK(23, 16)
+# define SO_NUM_ENTRIES_1_SHIFT 8
+# define SO_NUM_ENTRIES_1_MASK INTEL_MASK(15, 8)
+# define SO_NUM_ENTRIES_0_SHIFT 0
+# define SO_NUM_ENTRIES_0_MASK INTEL_MASK(7, 0)
+
+/* SO_DECL DW0 */
+# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT 12
+# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK INTEL_MASK(13, 12)
+# define SO_DECL_HOLE_FLAG (1 << 11)
+# define SO_DECL_REGISTER_INDEX_SHIFT 4
+# define SO_DECL_REGISTER_INDEX_MASK INTEL_MASK(9, 4)
+# define SO_DECL_COMPONENT_MASK_SHIFT 0
+# define SO_DECL_COMPONENT_MASK_MASK INTEL_MASK(3, 0)
+
+#define _3DSTATE_SO_BUFFER 0x7918 /* GEN7+ */
+/* DW1 */
+# define SO_BUFFER_INDEX_SHIFT 29
+# define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29)
+# define SO_BUFFER_PITCH_SHIFT 0
+# define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0)
+/* DW2: start address */
+/* DW3: end address. */
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Bitfields for the URB_WRITE message, DW2 of message header: */
+#define URB_WRITE_PRIM_END 0x1
+#define URB_WRITE_PRIM_START 0x2
+#define URB_WRITE_PRIM_TYPE_SHIFT 2
+
+
+/* Maximum number of entries that can be addressed using a binding table
+ * pointer of type SURFTYPE_BUFFER
+ */
+#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27)
+
+#define EX_DESC_SFID_MASK 0xF
+#define EX_DESC_EOT_MASK 0x20
+
+#endif
diff --git a/assembler/brw_disasm.c b/assembler/brw_disasm.c
new file mode 100644
index 0000000..4dec829
--- /dev/null
+++ b/assembler/brw_disasm.c
@@ -0,0 +1,1348 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "brw_compat.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+
+const struct opcode_desc opcode_descs[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+static const struct opcode_desc *opcode = opcode_descs;
+
+static const char * const conditional_modifier[16] = {
+ [BRW_CONDITIONAL_NONE] = "",
+ [BRW_CONDITIONAL_Z] = ".e",
+ [BRW_CONDITIONAL_NZ] = ".ne",
+ [BRW_CONDITIONAL_G] = ".g",
+ [BRW_CONDITIONAL_GE] = ".ge",
+ [BRW_CONDITIONAL_L] = ".l",
+ [BRW_CONDITIONAL_LE] = ".le",
+ [BRW_CONDITIONAL_R] = ".r",
+ [BRW_CONDITIONAL_O] = ".o",
+ [BRW_CONDITIONAL_U] = ".u",
+};
+
+static const char * const negate[2] = {
+ [0] = "",
+ [1] = "-",
+};
+
+static const char * const _abs[2] = {
+ [0] = "",
+ [1] = "(abs)",
+};
+
+static const char * const vert_stride[16] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4",
+ [4] = "8",
+ [5] = "16",
+ [6] = "32",
+ [15] = "VxH",
+};
+
+static const char * const width[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+};
+
+static const char * const horiz_stride[4] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4"
+};
+
+static const char * const chan_sel[4] = {
+ [0] = "x",
+ [1] = "y",
+ [2] = "z",
+ [3] = "w",
+};
+
+static const char * const debug_ctrl[2] = {
+ [0] = "",
+ [1] = ".breakpoint"
+};
+
+static const char * const saturate[2] = {
+ [0] = "",
+ [1] = ".sat"
+};
+
+static const char * const accwr[2] = {
+ [0] = "",
+ [1] = "AccWrEnable"
+};
+
+static const char * const wectrl[2] = {
+ [0] = "WE_normal",
+ [1] = "WE_all"
+};
+
+static const char * const exec_size[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+ [5] = "32"
+};
+
+static const char * const pred_inv[2] = {
+ [0] = "+",
+ [1] = "-"
+};
+
+static const char * const pred_ctrl_align16[16] = {
+ [1] = "",
+ [2] = ".x",
+ [3] = ".y",
+ [4] = ".z",
+ [5] = ".w",
+ [6] = ".any4h",
+ [7] = ".all4h",
+};
+
+static const char * const pred_ctrl_align1[16] = {
+ [1] = "",
+ [2] = ".anyv",
+ [3] = ".allv",
+ [4] = ".any2h",
+ [5] = ".all2h",
+ [6] = ".any4h",
+ [7] = ".all4h",
+ [8] = ".any8h",
+ [9] = ".all8h",
+ [10] = ".any16h",
+ [11] = ".all16h",
+};
+
+static const char * const thread_ctrl[4] = {
+ [0] = "",
+ [2] = "switch"
+};
+
+static const char * const compr_ctrl[4] = {
+ [0] = "",
+ [1] = "sechalf",
+ [2] = "compr",
+ [3] = "compr4",
+};
+
+static const char * const dep_ctrl[4] = {
+ [0] = "",
+ [1] = "NoDDClr",
+ [2] = "NoDDChk",
+ [3] = "NoDDClr,NoDDChk",
+};
+
+static const char * const mask_ctrl[4] = {
+ [0] = "",
+ [1] = "nomask",
+};
+
+static const char * const access_mode[2] = {
+ [0] = "align1",
+ [1] = "align16",
+};
+
+static const char * const reg_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [4] = "UB",
+ [5] = "B",
+ [7] = "F"
+};
+
+const int reg_type_size[8] = {
+ [0] = 4,
+ [1] = 4,
+ [2] = 2,
+ [3] = 2,
+ [4] = 1,
+ [5] = 1,
+ [7] = 4
+};
+
+static const char * const reg_file[4] = {
+ [0] = "A",
+ [1] = "g",
+ [2] = "m",
+ [3] = "imm",
+};
+
+static const char * const writemask[16] = {
+ [0x0] = ".",
+ [0x1] = ".x",
+ [0x2] = ".y",
+ [0x3] = ".xy",
+ [0x4] = ".z",
+ [0x5] = ".xz",
+ [0x6] = ".yz",
+ [0x7] = ".xyz",
+ [0x8] = ".w",
+ [0x9] = ".xw",
+ [0xa] = ".yw",
+ [0xb] = ".xyw",
+ [0xc] = ".zw",
+ [0xd] = ".xzw",
+ [0xe] = ".yzw",
+ [0xf] = "",
+};
+
+static const char * const end_of_thread[2] = {
+ [0] = "",
+ [1] = "EOT"
+};
+
+static const char * const target_function[16] = {
+ [BRW_SFID_NULL] = "null",
+ [BRW_SFID_MATH] = "math",
+ [BRW_SFID_SAMPLER] = "sampler",
+ [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+ [BRW_SFID_DATAPORT_READ] = "read",
+ [BRW_SFID_DATAPORT_WRITE] = "write",
+ [BRW_SFID_URB] = "urb",
+ [BRW_SFID_THREAD_SPAWNER] = "thread_spawner"
+};
+
+static const char * const target_function_gen6[16] = {
+ [BRW_SFID_NULL] = "null",
+ [BRW_SFID_MATH] = "math",
+ [BRW_SFID_SAMPLER] = "sampler",
+ [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+ [BRW_SFID_URB] = "urb",
+ [BRW_SFID_THREAD_SPAWNER] = "thread_spawner",
+ [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler",
+ [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render",
+ [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
+ [GEN7_SFID_DATAPORT_DATA_CACHE] = "data"
+};
+
+static const char * const dp_rc_msg_type_gen6[16] = {
+ [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+ [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+ [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+ [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
+static const char * const math_function[16] = {
+ [BRW_MATH_FUNCTION_INV] = "inv",
+ [BRW_MATH_FUNCTION_LOG] = "log",
+ [BRW_MATH_FUNCTION_EXP] = "exp",
+ [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+ [BRW_MATH_FUNCTION_RSQ] = "rsq",
+ [BRW_MATH_FUNCTION_SIN] = "sin",
+ [BRW_MATH_FUNCTION_COS] = "cos",
+ [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+ [BRW_MATH_FUNCTION_TAN] = "tan",
+ [BRW_MATH_FUNCTION_POW] = "pow",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv",
+ [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod",
+};
+
+static const char * const math_saturate[2] = {
+ [0] = "",
+ [1] = "sat"
+};
+
+static const char * const math_signed[2] = {
+ [0] = "",
+ [1] = "signed"
+};
+
+static const char * const math_scalar[2] = {
+ [0] = "",
+ [1] = "scalar"
+};
+
+static const char * const math_precision[2] = {
+ [0] = "",
+ [1] = "partial_precision"
+};
+
+static const char * const urb_opcode[2] = {
+ [0] = "urb_write",
+ [1] = "ff_sync",
+};
+
+static const char * const urb_swizzle[4] = {
+ [BRW_URB_SWIZZLE_NONE] = "",
+ [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+ [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+static const char * const urb_allocate[2] = {
+ [0] = "",
+ [1] = "allocate"
+};
+
+static const char * const urb_used[2] = {
+ [0] = "",
+ [1] = "used"
+};
+
+static const char * const urb_complete[2] = {
+ [0] = "",
+ [1] = "complete"
+};
+
+static const char * const sampler_target_format[4] = {
+ [0] = "F",
+ [2] = "UD",
+ [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, const char *string)
+{
+ fputs (string, file);
+ column += strlen (string);
+ return 0;
+}
+
+static int format (FILE *f, const char *format, ...) PRINTFLIKE(2, 3);
+static int format (FILE *f, const char *format, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start (args, format);
+
+ vsnprintf (buf, sizeof (buf) - 1, format, args);
+ va_end (args);
+ string (f, buf);
+ return 0;
+}
+
+static int newline (FILE *f)
+{
+ putc ('\n', f);
+ column = 0;
+ return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+ do
+ string (f, " ");
+ while (column < c);
+ return 0;
+}
+
+static int control (FILE *file, const char *name, const char * const ctrl[],
+ unsigned id, int *space)
+{
+ if (!ctrl[id]) {
+ fprintf (file, "*** invalid %s value %d ",
+ name, id);
+ return 1;
+ }
+ if (ctrl[id][0])
+ {
+ if (space && *space)
+ string (file, " ");
+ string (file, ctrl[id]);
+ if (space)
+ *space = 1;
+ }
+ return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+ if (!opcode[id].name) {
+ format (file, "*** invalid opcode value %d ", id);
+ return 1;
+ }
+ string (file, opcode[id].name);
+ return 0;
+}
+
+static int reg (FILE *file, unsigned _reg_file, unsigned _reg_nr)
+{
+ int err = 0;
+
+ /* Clear the Compr4 instruction compression bit. */
+ if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ _reg_nr &= ~(1 << 7);
+
+ if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (_reg_nr & 0xf0) {
+ case BRW_ARF_NULL:
+ string (file, "null");
+ return -1;
+ case BRW_ARF_ADDRESS:
+ format (file, "a%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ format (file, "acc%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_FLAG:
+ format (file, "f%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK:
+ format (file, "mask%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK_STACK:
+ format (file, "msd%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_STATE:
+ format (file, "sr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_CONTROL:
+ format (file, "cr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_NOTIFICATION_COUNT:
+ format (file, "n%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_IP:
+ string (file, "ip");
+ return -1;
+ break;
+ default:
+ format (file, "ARF%d", _reg_nr);
+ break;
+ }
+ } else {
+ err |= control (file, "src reg file", reg_file, _reg_file, NULL);
+ format (file, "%d", _reg_nr);
+ }
+ return err;
+}
+
+static int dest (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+ reg_type_size[inst->bits1.da1.dest_reg_type]);
+ format (file, "<%s>", horiz_stride[inst->bits1.da1.dest_horiz_stride]);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+ }
+ else
+ {
+ string (file, "g[a0");
+ if (inst->bits1.ia1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+ reg_type_size[inst->bits1.ia1.dest_reg_type]);
+ if (inst->bits1.ia1.dest_indirect_offset)
+ format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+ string (file, "]");
+ format (file, "<%s>", horiz_stride[inst->bits1.ia1.dest_horiz_stride]);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+ }
+ }
+ else
+ {
+ if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da16.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+ reg_type_size[inst->bits1.da16.dest_reg_type]);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+ }
+ else
+ {
+ err = 1;
+ string (file, "Indirect align16 address mode not supported");
+ }
+ }
+
+ return 0;
+}
+
+static int dest_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ uint32_t reg_file;
+
+ if (inst->bits1.da3src.dest_reg_file)
+ reg_file = BRW_MESSAGE_REGISTER_FILE;
+ else
+ reg_file = BRW_GENERAL_REGISTER_FILE;
+
+ err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da3src.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da3src.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL);
+
+ return 0;
+}
+
+static int src_align1_region (FILE *file,
+ unsigned _vert_stride, unsigned _width, unsigned _horiz_stride)
+{
+ int err = 0;
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",");
+ err |= control (file, "width", width, _width, NULL);
+ string (file, ",");
+ err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+ string (file, ">");
+ return err;
+}
+
+static int src_da1 (FILE *file, unsigned type, unsigned _reg_file,
+ unsigned _vert_stride, unsigned _width, unsigned _horiz_stride,
+ unsigned reg_num, unsigned sub_reg_num, unsigned __abs, unsigned _negate)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, reg_num);
+ if (err == -1)
+ return 0;
+ if (sub_reg_num)
+ format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_ia1 (FILE *file,
+ unsigned type,
+ unsigned _reg_file,
+ int _addr_imm,
+ unsigned _addr_subreg_nr,
+ unsigned _negate,
+ unsigned __abs,
+ unsigned _addr_mode,
+ unsigned _horiz_stride,
+ unsigned _width,
+ unsigned _vert_stride)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ string (file, "g[a0");
+ if (_addr_subreg_nr)
+ format (file, ".%d", _addr_subreg_nr);
+ if (_addr_imm)
+ format (file, " %d", _addr_imm);
+ string (file, "]");
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_da16 (FILE *file,
+ unsigned _reg_type,
+ unsigned _reg_file,
+ unsigned _vert_stride,
+ unsigned _reg_nr,
+ unsigned _subreg_nr,
+ unsigned __abs,
+ unsigned _negate,
+ unsigned swz_x,
+ unsigned swz_y,
+ unsigned swz_z,
+ unsigned swz_w)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, _reg_nr);
+ if (err == -1)
+ return 0;
+ if (_subreg_nr)
+ /* bit4 for subreg number byte addressing. Make this same meaning as
+ in da1 case, so output looks consistent. */
+ format (file, ".%d", 16 / reg_type_size[_reg_type]);
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",4,1>");
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ return err;
+}
+
+static int src0_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ unsigned swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
+ unsigned swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3;
+ unsigned swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3;
+ unsigned swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3;
+
+ err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL);
+
+ err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits2.da3src.src0_subreg_nr)
+ format (file, ".%d", inst->bits2.da3src.src0_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ BRW_REGISTER_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+static int src1_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ unsigned swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
+ unsigned swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3;
+ unsigned swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3;
+ unsigned swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3;
+ unsigned src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low |
+ (inst->bits3.da3src.src1_subreg_nr_high << 2));
+
+ err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate,
+ NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL);
+
+ err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+ inst->bits3.da3src.src1_reg_nr);
+ if (err == -1)
+ return 0;
+ if (src1_subreg_nr)
+ format (file, ".%d", src1_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ BRW_REGISTER_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+
+static int src2_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ unsigned swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
+ unsigned swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3;
+ unsigned swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3;
+ unsigned swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3;
+
+ err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate,
+ NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL);
+
+ err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+ inst->bits3.da3src.src2_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits3.da3src.src2_subreg_nr)
+ format (file, ".%d", inst->bits3.da3src.src2_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ BRW_REGISTER_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+static int imm (FILE *file, unsigned type, struct brw_instruction *inst) {
+ switch (type) {
+ case BRW_REGISTER_TYPE_UD:
+ format (file, "0x%08xUD", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ format (file, "%dD", inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UW:
+ format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_W:
+ format (file, "%dW", (int16_t) inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UB:
+ format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_VF:
+ format (file, "Vector Float");
+ break;
+ case BRW_REGISTER_TYPE_V:
+ format (file, "0x%08xV", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_F:
+ format (file, "%-gF", inst->bits3.f);
+ }
+ return 0;
+}
+
+static int src0 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src0_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src0_reg_type,
+ inst->bits1.da1.src0_reg_file,
+ inst->bits2.da1.src0_vert_stride,
+ inst->bits2.da1.src0_width,
+ inst->bits2.da1.src0_horiz_stride,
+ inst->bits2.da1.src0_reg_nr,
+ inst->bits2.da1.src0_subreg_nr,
+ inst->bits2.da1.src0_abs,
+ inst->bits2.da1.src0_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src0_reg_type,
+ inst->bits1.ia1.src0_reg_file,
+ inst->bits2.ia1.src0_indirect_offset,
+ inst->bits2.ia1.src0_subreg_nr,
+ inst->bits2.ia1.src0_negate,
+ inst->bits2.ia1.src0_abs,
+ inst->bits2.ia1.src0_address_mode,
+ inst->bits2.ia1.src0_horiz_stride,
+ inst->bits2.ia1.src0_width,
+ inst->bits2.ia1.src0_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src0_reg_type,
+ inst->bits1.da16.src0_reg_file,
+ inst->bits2.da16.src0_vert_stride,
+ inst->bits2.da16.src0_reg_nr,
+ inst->bits2.da16.src0_subreg_nr,
+ inst->bits2.da16.src0_abs,
+ inst->bits2.da16.src0_negate,
+ inst->bits2.da16.src0_swz_x,
+ inst->bits2.da16.src0_swz_y,
+ inst->bits2.da16.src0_swz_z,
+ inst->bits2.da16.src0_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static int src1 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src1_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src1_reg_type,
+ inst->bits1.da1.src1_reg_file,
+ inst->bits3.da1.src1_vert_stride,
+ inst->bits3.da1.src1_width,
+ inst->bits3.da1.src1_horiz_stride,
+ inst->bits3.da1.src1_reg_nr,
+ inst->bits3.da1.src1_subreg_nr,
+ inst->bits3.da1.src1_abs,
+ inst->bits3.da1.src1_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src1_reg_type,
+ inst->bits1.ia1.src1_reg_file,
+ inst->bits3.ia1.src1_indirect_offset,
+ inst->bits3.ia1.src1_subreg_nr,
+ inst->bits3.ia1.src1_negate,
+ inst->bits3.ia1.src1_abs,
+ inst->bits3.ia1.src1_address_mode,
+ inst->bits3.ia1.src1_horiz_stride,
+ inst->bits3.ia1.src1_width,
+ inst->bits3.ia1.src1_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src1_reg_type,
+ inst->bits1.da16.src1_reg_file,
+ inst->bits3.da16.src1_vert_stride,
+ inst->bits3.da16.src1_reg_nr,
+ inst->bits3.da16.src1_subreg_nr,
+ inst->bits3.da16.src1_abs,
+ inst->bits3.da16.src1_negate,
+ inst->bits3.da16.src1_swz_x,
+ inst->bits3.da16.src1_swz_y,
+ inst->bits3.da16.src1_swz_z,
+ inst->bits3.da16.src1_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+int esize[6] = {
+ [0] = 1,
+ [1] = 2,
+ [2] = 4,
+ [3] = 8,
+ [4] = 16,
+ [5] = 32,
+};
+
+static int qtr_ctrl(FILE *file, struct brw_instruction *inst)
+{
+ int qtr_ctl = inst->header.compression_control;
+ int exec_size = esize[inst->header.execution_size];
+
+ if (exec_size == 8) {
+ switch (qtr_ctl) {
+ case 0:
+ string (file, " 1Q");
+ break;
+ case 1:
+ string (file, " 2Q");
+ break;
+ case 2:
+ string (file, " 3Q");
+ break;
+ case 3:
+ string (file, " 4Q");
+ break;
+ }
+ } else if (exec_size == 16){
+ if (qtr_ctl < 2)
+ string (file, " 1H");
+ else
+ string (file, " 2H");
+ }
+ return 0;
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
+{
+ int err = 0;
+ int space = 0;
+
+ if (inst->header.predicate_control) {
+ string (file, "(");
+ err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+ format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+ if (inst->bits2.da1.flag_subreg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ err |= control (file, "predicate control align1", pred_ctrl_align1,
+ inst->header.predicate_control, NULL);
+ else
+ err |= control (file, "predicate control align16", pred_ctrl_align16,
+ inst->header.predicate_control, NULL);
+ string (file, ") ");
+ }
+
+ err |= print_opcode (file, inst->header.opcode);
+ err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+ err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+ if (inst->header.opcode == BRW_OPCODE_MATH) {
+ string (file, " ");
+ err |= control (file, "function", math_function,
+ inst->header.destreg__conditionalmod, NULL);
+ } else if (inst->header.opcode != BRW_OPCODE_SEND &&
+ inst->header.opcode != BRW_OPCODE_SENDC) {
+ err |= control (file, "conditional modifier", conditional_modifier,
+ inst->header.destreg__conditionalmod, NULL);
+
+ /* If we're using the conditional modifier, print which flags reg is
+ * used for it. Note that on gen6+, the embedded-condition SEL and
+ * control flow doesn't update flags.
+ */
+ if (inst->header.destreg__conditionalmod &&
+ (gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL &&
+ inst->header.opcode != BRW_OPCODE_IF &&
+ inst->header.opcode != BRW_OPCODE_WHILE))) {
+ format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+ if (inst->bits2.da1.flag_subreg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+ }
+ }
+
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "(");
+ err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+ string (file, ")");
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6)
+ format (file, " %d", inst->header.destreg__conditionalmod);
+
+ if (opcode[inst->header.opcode].nsrc == 3) {
+ pad (file, 16);
+ err |= dest_3src (file, inst);
+
+ pad (file, 32);
+ err |= src0_3src (file, inst);
+
+ pad (file, 48);
+ err |= src1_3src (file, inst);
+
+ pad (file, 64);
+ err |= src2_3src (file, inst);
+ } else {
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ } else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE ||
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
+ format (file, " %d", inst->bits3.break_cont.jip);
+ } else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF ||
+ inst->header.opcode == BRW_OPCODE_ELSE ||
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
+ format (file, " %d", inst->bits1.branch_gen6.jump_count);
+ } else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK ||
+ inst->header.opcode == BRW_OPCODE_CONTINUE ||
+ inst->header.opcode == BRW_OPCODE_HALT)) ||
+ (gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) {
+ format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip);
+ } else if (inst->header.opcode == BRW_OPCODE_JMPI) {
+ format (file, " %d", inst->bits3.d);
+ }
+
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC) {
+ enum brw_message_target target;
+
+ if (gen >= 6)
+ target = inst->header.destreg__conditionalmod;
+ else if (gen == 5)
+ target = inst->bits2.send_gen5.sfid;
+ else
+ target = inst->bits3.generic.msg_target;
+
+ newline (file);
+ pad (file, 16);
+ space = 0;
+
+ if (gen >= 6) {
+ err |= control (file, "target function", target_function_gen6,
+ target, &space);
+ } else {
+ err |= control (file, "target function", target_function,
+ target, &space);
+ }
+
+ switch (target) {
+ case BRW_SFID_MATH:
+ err |= control (file, "math function", math_function,
+ inst->bits3.math.function, &space);
+ err |= control (file, "math saturate", math_saturate,
+ inst->bits3.math.saturate, &space);
+ err |= control (file, "math signed", math_signed,
+ inst->bits3.math.int_type, &space);
+ err |= control (file, "math scalar", math_scalar,
+ inst->bits3.math.data_type, &space);
+ err |= control (file, "math precision", math_precision,
+ inst->bits3.math.precision, &space);
+ break;
+ case BRW_SFID_SAMPLER:
+ if (gen >= 7) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen7.binding_table_index,
+ inst->bits3.sampler_gen7.sampler,
+ inst->bits3.sampler_gen7.msg_type,
+ inst->bits3.sampler_gen7.simd_mode);
+ } else if (gen >= 5) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen5.binding_table_index,
+ inst->bits3.sampler_gen5.sampler,
+ inst->bits3.sampler_gen5.msg_type,
+ inst->bits3.sampler_gen5.simd_mode);
+ } else if (0 /* FINISHME: is_g4x */) {
+ format (file, " (%d, %d)",
+ inst->bits3.sampler_g4x.binding_table_index,
+ inst->bits3.sampler_g4x.sampler);
+ } else {
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format",
+ sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ }
+ break;
+ case BRW_SFID_DATAPORT_READ:
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.gen6_dp.binding_table_index,
+ inst->bits3.gen6_dp.msg_control,
+ inst->bits3.gen6_dp.msg_type,
+ inst->bits3.gen6_dp.send_commit_msg);
+ } else if (gen >= 5 /* FINISHME: || is_g4x */) {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read_gen5.binding_table_index,
+ inst->bits3.dp_read_gen5.msg_control,
+ inst->bits3.dp_read_gen5.msg_type);
+ } else {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read.binding_table_index,
+ inst->bits3.dp_read.msg_control,
+ inst->bits3.dp_read.msg_type);
+ }
+ break;
+
+ case BRW_SFID_DATAPORT_WRITE:
+ if (gen >= 7) {
+ format (file, " (");
+
+ err |= control (file, "DP rc message type",
+ dp_rc_msg_type_gen6,
+ inst->bits3.gen7_dp.msg_type, &space);
+
+ format (file, ", %d, %d, %d)",
+ inst->bits3.gen7_dp.binding_table_index,
+ inst->bits3.gen7_dp.msg_control,
+ inst->bits3.gen7_dp.msg_type);
+ } else if (gen == 6) {
+ format (file, " (");
+
+ err |= control (file, "DP rc message type",
+ dp_rc_msg_type_gen6,
+ inst->bits3.gen6_dp.msg_type, &space);
+
+ format (file, ", %d, %d, %d, %d)",
+ inst->bits3.gen6_dp.binding_table_index,
+ inst->bits3.gen6_dp.msg_control,
+ inst->bits3.gen6_dp.msg_type,
+ inst->bits3.gen6_dp.send_commit_msg);
+ } else {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.last_render_target << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ }
+ break;
+
+ case BRW_SFID_URB:
+ if (gen >= 5) {
+ format (file, " %d", inst->bits3.urb_gen5.offset);
+ } else {
+ format (file, " %d", inst->bits3.urb.offset);
+ }
+
+ space = 1;
+ if (gen >= 5) {
+ err |= control (file, "urb opcode", urb_opcode,
+ inst->bits3.urb_gen5.opcode, &space);
+ }
+ err |= control (file, "urb swizzle", urb_swizzle,
+ inst->bits3.urb.swizzle_control, &space);
+ err |= control (file, "urb allocate", urb_allocate,
+ inst->bits3.urb.allocate, &space);
+ err |= control (file, "urb used", urb_used,
+ inst->bits3.urb.used, &space);
+ err |= control (file, "urb complete", urb_complete,
+ inst->bits3.urb.complete, &space);
+ break;
+ case BRW_SFID_THREAD_SPAWNER:
+ break;
+ case GEN7_SFID_DATAPORT_DATA_CACHE:
+ format (file, " (%d, %d, %d)",
+ inst->bits3.gen7_dp.binding_table_index,
+ inst->bits3.gen7_dp.msg_control,
+ inst->bits3.gen7_dp.msg_type);
+ break;
+
+
+ default:
+ format (file, "unsupported target %d", target);
+ break;
+ }
+ if (space)
+ string (file, " ");
+ if (gen >= 5) {
+ format (file, "mlen %d",
+ inst->bits3.generic_gen5.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic_gen5.response_length);
+ } else {
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
+ }
+ pad (file, 64);
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "{");
+ space = 1;
+ err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+ if (gen >= 6)
+ err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space);
+ else
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+
+ if (gen >= 6)
+ err |= qtr_ctrl (file, inst);
+ else {
+ if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
+ opcode[inst->header.opcode].ndst > 0 &&
+ inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
+ inst->bits1.da1.dest_reg_nr & (1 << 7)) {
+ format (file, " compr4");
+ } else {
+ err |= control (file, "compression control", compr_ctrl,
+ inst->header.compression_control, &space);
+ }
+ }
+
+ err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (gen >= 6)
+ err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC)
+ err |= control (file, "end of thread", end_of_thread,
+ inst->bits3.generic.end_of_thread, &space);
+ if (space)
+ string (file, " ");
+ string (file, "}");
+ }
+ string (file, ";");
+ newline (file);
+ return err;
+}
diff --git a/assembler/brw_eu.c b/assembler/brw_eu.c
new file mode 100644
index 0000000..d874b79
--- /dev/null
+++ b/assembler/brw_eu.c
@@ -0,0 +1,268 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include <string.h>
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+#include "ralloc.h"
+
+/* Returns the corresponding conditional mod for swapping src0 and
+ * src1 in e.g. CMP.
+ */
+uint32_t
+brw_swap_cmod(uint32_t cmod)
+{
+ switch (cmod) {
+ case BRW_CONDITIONAL_Z:
+ case BRW_CONDITIONAL_NZ:
+ return cmod;
+ case BRW_CONDITIONAL_G:
+ return BRW_CONDITIONAL_L;
+ case BRW_CONDITIONAL_GE:
+ return BRW_CONDITIONAL_LE;
+ case BRW_CONDITIONAL_L:
+ return BRW_CONDITIONAL_G;
+ case BRW_CONDITIONAL_LE:
+ return BRW_CONDITIONAL_GE;
+ default:
+ return ~0;
+ }
+}
+
+
+/* How does predicate control work when execution_size != 8? Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value )
+{
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ if (value != 0xff) {
+ if (value != p->flag_value) {
+ brw_push_insn_state(p);
+ brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
+ p->flag_value = value;
+ brw_pop_insn_state(p);
+ }
+
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+}
+
+void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
+{
+ p->current->header.predicate_control = pc;
+}
+
+void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
+{
+ p->current->header.predicate_inverse = predicate_inverse;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
+{
+ p->current->header.destreg__conditionalmod = conditional;
+}
+
+void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg)
+{
+ p->current->bits2.da1.flag_reg_nr = reg;
+ p->current->bits2.da1.flag_subreg_nr = subreg;
+}
+
+void brw_set_access_mode( struct brw_compile *p, unsigned access_mode )
+{
+ p->current->header.access_mode = access_mode;
+}
+
+void
+brw_set_compression_control(struct brw_compile *p,
+ enum brw_compression compression_control)
+{
+ p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
+
+ if (p->brw->intel.gen >= 6) {
+ /* Since we don't use the 32-wide support in gen6, we translate
+ * the pre-gen6 compression control here.
+ */
+ switch (compression_control) {
+ case BRW_COMPRESSION_NONE:
+ /* This is the "use the first set of bits of dmask/vmask/arf
+ * according to execsize" option.
+ */
+ p->current->header.compression_control = GEN6_COMPRESSION_1Q;
+ break;
+ case BRW_COMPRESSION_2NDHALF:
+ /* For 8-wide, this is "use the second set of 8 bits." */
+ p->current->header.compression_control = GEN6_COMPRESSION_2Q;
+ break;
+ case BRW_COMPRESSION_COMPRESSED:
+ /* For 16-wide instruction compression, use the first set of 16 bits
+ * since we don't do 32-wide dispatch.
+ */
+ p->current->header.compression_control = GEN6_COMPRESSION_1H;
+ break;
+ default:
+ assert(!"not reached");
+ p->current->header.compression_control = GEN6_COMPRESSION_1H;
+ break;
+ }
+ } else {
+ p->current->header.compression_control = compression_control;
+ }
+}
+
+void brw_set_mask_control( struct brw_compile *p, unsigned value )
+{
+ p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, bool enable )
+{
+ p->current->header.saturate = enable;
+}
+
+void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
+{
+ if (p->brw->intel.gen >= 6)
+ p->current->header.acc_wr_control = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+ assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+ memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->compressed_stack[p->current - p->stack] = p->compressed;
+ p->current++;
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+ assert(p->current != p->stack);
+ p->current--;
+ p->compressed = p->compressed_stack[p->current - p->stack];
+}
+
+
+/***********************************************************************
+ */
+void
+brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->brw = brw;
+ /*
+ * Set the initial instruction store array size to 1024, if found that
+ * isn't enough, then it will double the store size at brw_next_insn()
+ * until out of memory.
+ */
+ p->store_size = 1024;
+ p->store = rzalloc_array(mem_ctx, struct brw_instruction, p->store_size);
+ p->nr_insn = 0;
+ p->current = p->stack;
+ p->compressed = false;
+ memset(p->current, 0, sizeof(p->current[0]));
+
+ p->mem_ctx = mem_ctx;
+
+ /* Some defaults?
+ */
+ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+ brw_set_saturate(p, 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_predicate_control_flag_value(p, 0xff);
+
+ /* Set up control flow stack */
+ p->if_stack_depth = 0;
+ p->if_stack_array_size = 16;
+ p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
+
+ p->loop_stack_depth = 0;
+ p->loop_stack_array_size = 16;
+ p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
+ p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
+
+ brw_init_compaction_tables(&brw->intel);
+}
+
+
+const unsigned *brw_get_program( struct brw_compile *p,
+ unsigned *sz )
+{
+ brw_compact_instructions(p);
+
+ *sz = p->next_insn_offset;
+ return (const unsigned *)p->store;
+}
+
+void
+brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+ void *store = p->store;
+ bool dump_hex = false;
+
+ for (int offset = start; offset < end;) {
+ struct brw_instruction *insn = store + offset;
+ struct brw_instruction uncompacted;
+ printf("0x%08x: ", offset);
+
+ if (insn->header.cmpt_control) {
+ struct brw_compact_instruction *compacted = (void *)insn;
+ if (dump_hex) {
+ printf("0x%08x 0x%08x ",
+ ((uint32_t *)insn)[1],
+ ((uint32_t *)insn)[0]);
+ }
+
+ brw_uncompact_instruction(intel, &uncompacted, compacted);
+ insn = &uncompacted;
+ offset += 8;
+ } else {
+ if (dump_hex) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)insn)[3],
+ ((uint32_t *)insn)[2],
+ ((uint32_t *)insn)[1],
+ ((uint32_t *)insn)[0]);
+ }
+ offset += 16;
+ }
+
+ brw_disasm(stdout, insn, p->brw->intel.gen);
+ }
+}
diff --git a/assembler/brw_eu.h b/assembler/brw_eu.h
new file mode 100644
index 0000000..427db37
--- /dev/null
+++ b/assembler/brw_eu.h
@@ -0,0 +1,427 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include "brw_context.h"
+#include "brw_structs.h"
+#include "brw_defines.h"
+#include "brw_reg.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BRW_EU_MAX_INSN_STACK 5
+
+struct brw_compile {
+ struct brw_instruction *store;
+ int store_size;
+ unsigned nr_insn;
+ unsigned int next_insn_offset;
+
+ void *mem_ctx;
+
+ /* Allow clients to push/pop instruction state:
+ */
+ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ bool compressed_stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_instruction *current;
+
+ unsigned flag_value;
+ bool single_program_flow;
+ bool compressed;
+ struct brw_context *brw;
+
+ /* Control flow stacks:
+ * - if_stack contains IF and ELSE instructions which must be patched
+ * (and popped) once the matching ENDIF instruction is encountered.
+ *
+ * Just store the instruction pointer(an index).
+ */
+ int *if_stack;
+ int if_stack_depth;
+ int if_stack_array_size;
+
+ /**
+ * loop_stack contains the instruction pointers of the starts of loops which
+ * must be patched (and popped) once the matching WHILE instruction is
+ * encountered.
+ */
+ int *loop_stack;
+ /**
+ * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
+ * blocks they were popping out of, to fix up the mask stack. This tracks
+ * the IF/ENDIF nesting in each current nested loop level.
+ */
+ int *if_depth_in_loop;
+ int loop_stack_depth;
+ int loop_stack_array_size;
+};
+
+static inline struct brw_instruction *current_insn( struct brw_compile *p)
+{
+ return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, unsigned value );
+void brw_set_saturate( struct brw_compile *p, bool enable );
+void brw_set_access_mode( struct brw_compile *p, unsigned access_mode );
+void brw_set_compression_control(struct brw_compile *p, enum brw_compression c);
+void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
+void brw_set_predicate_control( struct brw_compile *p, unsigned pc );
+void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
+void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional );
+void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg);
+void brw_set_acc_write_control(struct brw_compile *p, unsigned value);
+
+void brw_init_compile(struct brw_context *, struct brw_compile *p,
+ void *mem_ctx);
+void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end);
+const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz );
+
+struct brw_instruction *brw_next_insn(struct brw_compile *p, unsigned opcode);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg reg);
+
+void gen6_resolve_implied_move(struct brw_compile *p,
+ struct brw_reg *src,
+ unsigned msg_reg_nr);
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0);
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1);
+
+#define ALU3(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1, \
+ struct brw_reg src2);
+
+#define ROUND(OP) \
+void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(AVG)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+ALU2(PLN)
+ALU3(MAD)
+
+ROUND(RNDZ)
+ROUND(RNDE)
+
+#undef ALU1
+#undef ALU2
+#undef ALU3
+#undef ROUND
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_set_sampler_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ unsigned header_present,
+ unsigned simd_mode,
+ unsigned return_format);
+
+void brw_set_dp_read_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned msg_control,
+ unsigned msg_type,
+ unsigned target_cache,
+ unsigned msg_length,
+ bool header_present,
+ unsigned response_length);
+
+void brw_set_dp_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned msg_control,
+ unsigned msg_type,
+ unsigned msg_length,
+ bool header_present,
+ unsigned last_render_target,
+ unsigned response_length,
+ unsigned end_of_thread,
+ unsigned send_commit_msg);
+
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ bool allocate,
+ bool used,
+ unsigned msg_length,
+ unsigned response_length,
+ bool eot,
+ bool writes_complete,
+ unsigned offset,
+ unsigned swizzle);
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ bool allocate,
+ unsigned response_length,
+ bool eot);
+
+void brw_svb_write(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ bool send_commit_msg);
+
+void brw_fb_WRITE(struct brw_compile *p,
+ int dispatch_width,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned msg_control,
+ unsigned binding_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ bool eot,
+ bool header_present);
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned writemask,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ unsigned header_present,
+ unsigned simd_mode,
+ unsigned return_format);
+
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned data_type,
+ unsigned precision );
+
+void brw_math2(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+void brw_oword_block_read(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ uint32_t offset,
+ uint32_t bind_table_index);
+
+void brw_oword_block_read_scratch(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ int num_regs,
+ unsigned offset);
+
+void brw_oword_block_write_scratch(struct brw_compile *p,
+ struct brw_reg mrf,
+ int num_regs,
+ unsigned offset);
+
+void brw_shader_time_add(struct brw_compile *p,
+ int mrf,
+ uint32_t surf_index);
+
+/* If/else/endif. Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+ unsigned execute_size);
+struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
+ struct brw_reg src0, struct brw_reg src1);
+
+void brw_ELSE(struct brw_compile *p);
+void brw_ENDIF(struct brw_compile *p);
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+ unsigned execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_HALT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+void brw_WAIT(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned conditional,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+/***********************************************************************
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ unsigned count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ unsigned count);
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count);
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count);
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src);
+
+void brw_set_src1(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg reg);
+
+void brw_set_uip_jip(struct brw_compile *p);
+
+uint32_t brw_swap_cmod(uint32_t cmod);
+
+void
+brw_set_3src_dest(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg dest);
+void
+brw_set_3src_src0(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg src0);
+void
+brw_set_3src_src1(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg src1);
+void
+brw_set_3src_src2(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg src2);
+
+/* brw_eu_compact.c */
+void brw_init_compaction_tables(struct intel_context *intel);
+void brw_compact_instructions(struct brw_compile *p);
+void brw_uncompact_instruction(struct intel_context *intel,
+ struct brw_instruction *dst,
+ struct brw_compact_instruction *src);
+bool brw_try_compact_instruction(struct brw_compile *p,
+ struct brw_compact_instruction *dst,
+ struct brw_instruction *src);
+
+void brw_debug_compact_uncompact(struct intel_context *intel,
+ struct brw_instruction *orig,
+ struct brw_instruction *uncompacted);
+
+/* brw_optimize.c */
+void brw_optimize(struct brw_compile *p);
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/assembler/brw_eu_compact.c b/assembler/brw_eu_compact.c
new file mode 100644
index 0000000..d362ed3
--- /dev/null
+++ b/assembler/brw_eu_compact.c
@@ -0,0 +1,810 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_eu_compact.c
+ *
+ * Instruction compaction is a feature of gm45 and newer hardware that allows
+ * for a smaller instruction encoding.
+ *
+ * The instruction cache is on the order of 32KB, and many programs generate
+ * far more instructions than that. The instruction cache is built to barely
+ * keep up with instruction dispatch abaility in cache hit cases -- L1
+ * instruction cache misses that still hit in the next level could limit
+ * throughput by around 50%.
+ *
+ * The idea of instruction compaction is that most instructions use a tiny
+ * subset of the GPU functionality, so we can encode what would be a 16 byte
+ * instruction in 8 bytes using some lookup tables for various fields.
+ */
+
+#include <string.h>
+
+#include "brw_compat.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+
+static const uint32_t gen6_control_index_table[32] = {
+ 0b00000000000000000,
+ 0b01000000000000000,
+ 0b00110000000000000,
+ 0b00000000100000000,
+ 0b00010000000000000,
+ 0b00001000100000000,
+ 0b00000000100000010,
+ 0b00000000000000010,
+ 0b01000000100000000,
+ 0b01010000000000000,
+ 0b10110000000000000,
+ 0b00100000000000000,
+ 0b11010000000000000,
+ 0b11000000000000000,
+ 0b01001000100000000,
+ 0b01000000000001000,
+ 0b01000000000000100,
+ 0b00000000000001000,
+ 0b00000000000000100,
+ 0b00111000100000000,
+ 0b00001000100000010,
+ 0b00110000100000000,
+ 0b00110000000000001,
+ 0b00100000000000001,
+ 0b00110000000000010,
+ 0b00110000000000101,
+ 0b00110000000001001,
+ 0b00110000000010000,
+ 0b00110000000000011,
+ 0b00110000000000100,
+ 0b00110000100001000,
+ 0b00100000000001001
+};
+
+static const uint32_t gen6_datatype_table[32] = {
+ 0b001001110000000000,
+ 0b001000110000100000,
+ 0b001001110000000001,
+ 0b001000000001100000,
+ 0b001010110100101001,
+ 0b001000000110101101,
+ 0b001100011000101100,
+ 0b001011110110101101,
+ 0b001000000111101100,
+ 0b001000000001100001,
+ 0b001000110010100101,
+ 0b001000000001000001,
+ 0b001000001000110001,
+ 0b001000001000101001,
+ 0b001000000000100000,
+ 0b001000001000110010,
+ 0b001010010100101001,
+ 0b001011010010100101,
+ 0b001000000110100101,
+ 0b001100011000101001,
+ 0b001011011000101100,
+ 0b001011010110100101,
+ 0b001011110110100101,
+ 0b001111011110111101,
+ 0b001111011110111100,
+ 0b001111011110111101,
+ 0b001111011110011101,
+ 0b001111011110111110,
+ 0b001000000000100001,
+ 0b001000000000100010,
+ 0b001001111111011101,
+ 0b001000001110111110,
+};
+
+static const uint32_t gen6_subreg_table[32] = {
+ 0b000000000000000,
+ 0b000000000000100,
+ 0b000000110000000,
+ 0b111000000000000,
+ 0b011110000001000,
+ 0b000010000000000,
+ 0b000000000010000,
+ 0b000110000001100,
+ 0b001000000000000,
+ 0b000001000000000,
+ 0b000001010010100,
+ 0b000000001010110,
+ 0b010000000000000,
+ 0b110000000000000,
+ 0b000100000000000,
+ 0b000000010000000,
+ 0b000000000001000,
+ 0b100000000000000,
+ 0b000001010000000,
+ 0b001010000000000,
+ 0b001100000000000,
+ 0b000000001010100,
+ 0b101101010010100,
+ 0b010100000000000,
+ 0b000000010001111,
+ 0b011000000000000,
+ 0b111110000000000,
+ 0b101000000000000,
+ 0b000000000001111,
+ 0b000100010001111,
+ 0b001000010001111,
+ 0b000110000000000,
+};
+
+static const uint32_t gen6_src_index_table[32] = {
+ 0b000000000000,
+ 0b010110001000,
+ 0b010001101000,
+ 0b001000101000,
+ 0b011010010000,
+ 0b000100100000,
+ 0b010001101100,
+ 0b010101110000,
+ 0b011001111000,
+ 0b001100101000,
+ 0b010110001100,
+ 0b001000100000,
+ 0b010110001010,
+ 0b000000000010,
+ 0b010101010000,
+ 0b010101101000,
+ 0b111101001100,
+ 0b111100101100,
+ 0b011001110000,
+ 0b010110001001,
+ 0b010101011000,
+ 0b001101001000,
+ 0b010000101100,
+ 0b010000000000,
+ 0b001101110000,
+ 0b001100010000,
+ 0b001100000000,
+ 0b010001101010,
+ 0b001101111000,
+ 0b000001110000,
+ 0b001100100000,
+ 0b001101010000,
+};
+
+static const uint32_t gen7_control_index_table[32] = {
+ 0b0000000000000000010,
+ 0b0000100000000000000,
+ 0b0000100000000000001,
+ 0b0000100000000000010,
+ 0b0000100000000000011,
+ 0b0000100000000000100,
+ 0b0000100000000000101,
+ 0b0000100000000000111,
+ 0b0000100000000001000,
+ 0b0000100000000001001,
+ 0b0000100000000001101,
+ 0b0000110000000000000,
+ 0b0000110000000000001,
+ 0b0000110000000000010,
+ 0b0000110000000000011,
+ 0b0000110000000000100,
+ 0b0000110000000000101,
+ 0b0000110000000000111,
+ 0b0000110000000001001,
+ 0b0000110000000001101,
+ 0b0000110000000010000,
+ 0b0000110000100000000,
+ 0b0001000000000000000,
+ 0b0001000000000000010,
+ 0b0001000000000000100,
+ 0b0001000000100000000,
+ 0b0010110000000000000,
+ 0b0010110000000010000,
+ 0b0011000000000000000,
+ 0b0011000000100000000,
+ 0b0101000000000000000,
+ 0b0101000000100000000
+};
+
+static const uint32_t gen7_datatype_table[32] = {
+ 0b001000000000000001,
+ 0b001000000000100000,
+ 0b001000000000100001,
+ 0b001000000001100001,
+ 0b001000000010111101,
+ 0b001000001011111101,
+ 0b001000001110100001,
+ 0b001000001110100101,
+ 0b001000001110111101,
+ 0b001000010000100001,
+ 0b001000110000100000,
+ 0b001000110000100001,
+ 0b001001010010100101,
+ 0b001001110010100100,
+ 0b001001110010100101,
+ 0b001111001110111101,
+ 0b001111011110011101,
+ 0b001111011110111100,
+ 0b001111011110111101,
+ 0b001111111110111100,
+ 0b000000001000001100,
+ 0b001000000000111101,
+ 0b001000000010100101,
+ 0b001000010000100000,
+ 0b001001010010100100,
+ 0b001001110010000100,
+ 0b001010010100001001,
+ 0b001101111110111101,
+ 0b001111111110111101,
+ 0b001011110110101100,
+ 0b001010010100101000,
+ 0b001010110100101000
+};
+
+static const uint32_t gen7_subreg_table[32] = {
+ 0b000000000000000,
+ 0b000000000000001,
+ 0b000000000001000,
+ 0b000000000001111,
+ 0b000000000010000,
+ 0b000000010000000,
+ 0b000000100000000,
+ 0b000000110000000,
+ 0b000001000000000,
+ 0b000001000010000,
+ 0b000010100000000,
+ 0b001000000000000,
+ 0b001000000000001,
+ 0b001000010000001,
+ 0b001000010000010,
+ 0b001000010000011,
+ 0b001000010000100,
+ 0b001000010000111,
+ 0b001000010001000,
+ 0b001000010001110,
+ 0b001000010001111,
+ 0b001000110000000,
+ 0b001000111101000,
+ 0b010000000000000,
+ 0b010000110000000,
+ 0b011000000000000,
+ 0b011110010000111,
+ 0b100000000000000,
+ 0b101000000000000,
+ 0b110000000000000,
+ 0b111000000000000,
+ 0b111000000011100
+};
+
+static const uint32_t gen7_src_index_table[32] = {
+ 0b000000000000,
+ 0b000000000010,
+ 0b000000010000,
+ 0b000000010010,
+ 0b000000011000,
+ 0b000000100000,
+ 0b000000101000,
+ 0b000001001000,
+ 0b000001010000,
+ 0b000001110000,
+ 0b000001111000,
+ 0b001100000000,
+ 0b001100000010,
+ 0b001100001000,
+ 0b001100010000,
+ 0b001100010010,
+ 0b001100100000,
+ 0b001100101000,
+ 0b001100111000,
+ 0b001101000000,
+ 0b001101000010,
+ 0b001101001000,
+ 0b001101010000,
+ 0b001101100000,
+ 0b001101101000,
+ 0b001101110000,
+ 0b001101110001,
+ 0b001101111000,
+ 0b010001101000,
+ 0b010001101001,
+ 0b010001101010,
+ 0b010110001000
+};
+
+static const uint32_t *control_index_table;
+static const uint32_t *datatype_table;
+static const uint32_t *subreg_table;
+static const uint32_t *src_index_table;
+
+static bool
+set_control_index(struct intel_context *intel,
+ struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t *src_u32 = (uint32_t *)src;
+ uint32_t uncompacted = 0;
+
+ uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0;
+ uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16;
+ /* On gen7, the flag register number gets integrated into the control
+ * index.
+ */
+ if (intel->gen >= 7)
+ uncompacted |= ((src_u32[2] >> 25) & 0x3) << 17;
+
+ for (int i = 0; i < 32; i++) {
+ if (control_index_table[i] == uncompacted) {
+ dst->dw0.control_index = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+set_datatype_index(struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t uncompacted = 0;
+
+ uncompacted |= src->bits1.ud & 0x7fff;
+ uncompacted |= (src->bits1.ud >> 29) << 15;
+
+ for (int i = 0; i < 32; i++) {
+ if (datatype_table[i] == uncompacted) {
+ dst->dw0.data_type_index = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+set_subreg_index(struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t uncompacted = 0;
+
+ uncompacted |= src->bits1.da1.dest_subreg_nr << 0;
+ uncompacted |= src->bits2.da1.src0_subreg_nr << 5;
+ uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
+
+ for (int i = 0; i < 32; i++) {
+ if (subreg_table[i] == uncompacted) {
+ dst->dw0.sub_reg_index = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+get_src_index(uint32_t uncompacted,
+ uint32_t *compacted)
+{
+ for (int i = 0; i < 32; i++) {
+ if (src_index_table[i] == uncompacted) {
+ *compacted = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool
+set_src0_index(struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t compacted, uncompacted = 0;
+
+ uncompacted |= (src->bits2.ud >> 13) & 0xfff;
+
+ if (!get_src_index(uncompacted, &compacted))
+ return false;
+
+ dst->dw0.src0_index = compacted & 0x3;
+ dst->dw1.src0_index = compacted >> 2;
+
+ return true;
+}
+
+static bool
+set_src1_index(struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ uint32_t compacted, uncompacted = 0;
+
+ uncompacted |= (src->bits3.ud >> 13) & 0xfff;
+
+ if (!get_src_index(uncompacted, &compacted))
+ return false;
+
+ dst->dw1.src1_index = compacted;
+
+ return true;
+}
+
+/**
+ * Tries to compact instruction src into dst.
+ *
+ * It doesn't modify dst unless src is compactable, which is relied on by
+ * brw_compact_instructions().
+ */
+bool
+brw_try_compact_instruction(struct brw_compile *p,
+ struct brw_compact_instruction *dst,
+ struct brw_instruction *src)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+ struct brw_compact_instruction temp;
+
+ if (src->header.opcode == BRW_OPCODE_IF ||
+ src->header.opcode == BRW_OPCODE_ELSE ||
+ src->header.opcode == BRW_OPCODE_ENDIF ||
+ src->header.opcode == BRW_OPCODE_HALT ||
+ src->header.opcode == BRW_OPCODE_DO ||
+ src->header.opcode == BRW_OPCODE_WHILE) {
+ /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs
+ * to be able to handle compacted flow control instructions..
+ */
+ return false;
+ }
+
+ /* FINISHME: immediates */
+ if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
+ src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return false;
+
+ memset(&temp, 0, sizeof(temp));
+
+ temp.dw0.opcode = src->header.opcode;
+ temp.dw0.debug_control = src->header.debug_control;
+ if (!set_control_index(intel, &temp, src))
+ return false;
+ if (!set_datatype_index(&temp, src))
+ return false;
+ if (!set_subreg_index(&temp, src))
+ return false;
+ temp.dw0.acc_wr_control = src->header.acc_wr_control;
+ temp.dw0.conditionalmod = src->header.destreg__conditionalmod;
+ if (intel->gen <= 6)
+ temp.dw0.flag_subreg_nr = src->bits2.da1.flag_subreg_nr;
+ temp.dw0.cmpt_ctrl = 1;
+ if (!set_src0_index(&temp, src))
+ return false;
+ if (!set_src1_index(&temp, src))
+ return false;
+ temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr;
+ temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr;
+ temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
+
+ *dst = temp;
+
+ return true;
+}
+
+static void
+set_uncompacted_control(struct intel_context *intel,
+ struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t *dst_u32 = (uint32_t *)dst;
+ uint32_t uncompacted = control_index_table[src->dw0.control_index];
+
+ dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8;
+ dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31;
+
+ if (intel->gen >= 7)
+ dst_u32[2] |= ((uncompacted >> 17) & 0x3) << 25;
+}
+
+static void
+set_uncompacted_datatype(struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t uncompacted = datatype_table[src->dw0.data_type_index];
+
+ dst->bits1.ud &= ~(0x7 << 29);
+ dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29;
+ dst->bits1.ud &= ~0x7fff;
+ dst->bits1.ud |= uncompacted & 0x7fff;
+}
+
+static void
+set_uncompacted_subreg(struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t uncompacted = subreg_table[src->dw0.sub_reg_index];
+
+ dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0) & 0x1f;
+ dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5) & 0x1f;
+ dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f;
+}
+
+static void
+set_uncompacted_src0(struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2;
+ uint32_t uncompacted = src_index_table[compacted];
+
+ dst->bits2.ud |= uncompacted << 13;
+}
+
+static void
+set_uncompacted_src1(struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ uint32_t uncompacted = src_index_table[src->dw1.src1_index];
+
+ dst->bits3.ud |= uncompacted << 13;
+}
+
+void
+brw_uncompact_instruction(struct intel_context *intel,
+ struct brw_instruction *dst,
+ struct brw_compact_instruction *src)
+{
+ memset(dst, 0, sizeof(*dst));
+
+ dst->header.opcode = src->dw0.opcode;
+ dst->header.debug_control = src->dw0.debug_control;
+
+ set_uncompacted_control(intel, dst, src);
+ set_uncompacted_datatype(dst, src);
+ set_uncompacted_subreg(dst, src);
+ dst->header.acc_wr_control = src->dw0.acc_wr_control;
+ dst->header.destreg__conditionalmod = src->dw0.conditionalmod;
+ if (intel->gen <= 6)
+ dst->bits2.da1.flag_subreg_nr = src->dw0.flag_subreg_nr;
+ set_uncompacted_src0(dst, src);
+ set_uncompacted_src1(dst, src);
+ dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr;
+ dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr;
+ dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
+}
+
+void brw_debug_compact_uncompact(struct intel_context *intel,
+ struct brw_instruction *orig,
+ struct brw_instruction *uncompacted)
+{
+ fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
+ intel->gen);
+
+ fprintf(stderr, " before: ");
+ brw_disasm(stderr, orig, intel->gen);
+
+ fprintf(stderr, " after: ");
+ brw_disasm(stderr, uncompacted, intel->gen);
+
+ uint32_t *before_bits = (uint32_t *)orig;
+ uint32_t *after_bits = (uint32_t *)uncompacted;
+ printf(" changed bits:\n");
+ for (int i = 0; i < 128; i++) {
+ uint32_t before = before_bits[i / 32] & (1 << (i & 31));
+ uint32_t after = after_bits[i / 32] & (1 << (i & 31));
+
+ if (before != after) {
+ printf(" bit %d, %s to %s\n", i,
+ before ? "set" : "unset",
+ after ? "set" : "unset");
+ }
+ }
+}
+
+static int
+compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
+{
+ int this_compacted_count = compacted_counts[old_ip];
+ int target_compacted_count = compacted_counts[old_target_ip];
+ return target_compacted_count - this_compacted_count;
+}
+
+static void
+update_uip_jip(struct brw_instruction *insn, int this_old_ip,
+ int *compacted_counts)
+{
+ int target_old_ip;
+
+ target_old_ip = this_old_ip + insn->bits3.break_cont.jip;
+ insn->bits3.break_cont.jip -= compacted_between(this_old_ip,
+ target_old_ip,
+ compacted_counts);
+
+ target_old_ip = this_old_ip + insn->bits3.break_cont.uip;
+ insn->bits3.break_cont.uip -= compacted_between(this_old_ip,
+ target_old_ip,
+ compacted_counts);
+}
+
+void
+brw_init_compaction_tables(struct intel_context *intel)
+{
+ assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
+ assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
+ assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
+ assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
+ assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
+ assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
+ assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
+ assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
+
+ switch (intel->gen) {
+ case 7:
+ control_index_table = gen7_control_index_table;
+ datatype_table = gen7_datatype_table;
+ subreg_table = gen7_subreg_table;
+ src_index_table = gen7_src_index_table;
+ break;
+ case 6:
+ control_index_table = gen6_control_index_table;
+ datatype_table = gen6_datatype_table;
+ subreg_table = gen6_subreg_table;
+ src_index_table = gen6_src_index_table;
+ break;
+ default:
+ return;
+ }
+}
+
+void
+brw_compact_instructions(struct brw_compile *p)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+ void *store = p->store;
+ /* For an instruction at byte offset 8*i before compaction, this is the number
+ * of compacted instructions that preceded it.
+ */
+ int compacted_counts[p->next_insn_offset / 8];
+ /* For an instruction at byte offset 8*i after compaction, this is the
+ * 8-byte offset it was at before compaction.
+ */
+ int old_ip[p->next_insn_offset / 8];
+
+ if (intel->gen < 6)
+ return;
+
+ int src_offset;
+ int offset = 0;
+ int compacted_count = 0;
+ for (src_offset = 0; src_offset < p->nr_insn * 16;) {
+ struct brw_instruction *src = store + src_offset;
+ void *dst = store + offset;
+
+ old_ip[offset / 8] = src_offset / 8;
+ compacted_counts[src_offset / 8] = compacted_count;
+
+ struct brw_instruction saved = *src;
+
+ if (!src->header.cmpt_control &&
+ brw_try_compact_instruction(p, dst, src)) {
+ compacted_count++;
+
+ if (INTEL_DEBUG) {
+ struct brw_instruction uncompacted;
+ brw_uncompact_instruction(intel, &uncompacted, dst);
+ if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
+ brw_debug_compact_uncompact(intel, &saved, &uncompacted);
+ }
+ }
+
+ offset += 8;
+ src_offset += 16;
+ } else {
+ int size = src->header.cmpt_control ? 8 : 16;
+
+ /* It appears that the end of thread SEND instruction needs to be
+ * aligned, or the GPU hangs.
+ */
+ if ((src->header.opcode == BRW_OPCODE_SEND ||
+ src->header.opcode == BRW_OPCODE_SENDC) &&
+ src->bits3.generic.end_of_thread &&
+ (offset & 8) != 0) {
+ struct brw_compact_instruction *align = store + offset;
+ memset(align, 0, sizeof(*align));
+ align->dw0.opcode = BRW_OPCODE_NOP;
+ align->dw0.cmpt_ctrl = 1;
+ offset += 8;
+ old_ip[offset / 8] = src_offset / 8;
+ dst = store + offset;
+ }
+
+ /* If we didn't compact this intruction, we need to move it down into
+ * place.
+ */
+ if (offset != src_offset) {
+ memmove(dst, src, size);
+ }
+ offset += size;
+ src_offset += size;
+ }
+ }
+
+ /* Fix up control flow offsets. */
+ p->next_insn_offset = offset;
+ for (offset = 0; offset < p->next_insn_offset;) {
+ struct brw_instruction *insn = store + offset;
+ int this_old_ip = old_ip[offset / 8];
+ int this_compacted_count = compacted_counts[this_old_ip];
+ int target_old_ip, target_compacted_count;
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_BREAK:
+ case BRW_OPCODE_CONTINUE:
+ case BRW_OPCODE_HALT:
+ update_uip_jip(insn, this_old_ip, compacted_counts);
+ break;
+
+ case BRW_OPCODE_IF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_WHILE:
+ if (intel->gen == 6) {
+ target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count;
+ target_compacted_count = compacted_counts[target_old_ip];
+ insn->bits1.branch_gen6.jump_count -= (target_compacted_count -
+ this_compacted_count);
+ } else {
+ update_uip_jip(insn, this_old_ip, compacted_counts);
+ }
+ break;
+ }
+
+ if (insn->header.cmpt_control) {
+ offset += 8;
+ } else {
+ offset += 16;
+ }
+ }
+
+ /* p->nr_insn is counting the number of uncompacted instructions still, so
+ * divide. We do want to be sure there's a valid instruction in any
+ * alignment padding, so that the next compression pass (for the FS 8/16
+ * compile passes) parses correctly.
+ */
+ if (p->next_insn_offset & 8) {
+ struct brw_compact_instruction *align = store + offset;
+ memset(align, 0, sizeof(*align));
+ align->dw0.opcode = BRW_OPCODE_NOP;
+ align->dw0.cmpt_ctrl = 1;
+ p->next_insn_offset += 8;
+ }
+ p->nr_insn = p->next_insn_offset / 16;
+
+ if (0) {
+ fprintf(stdout, "dumping compacted program\n");
+ brw_dump_compile(p, stdout, 0, p->next_insn_offset);
+
+ int cmp = 0;
+ for (offset = 0; offset < p->next_insn_offset;) {
+ struct brw_instruction *insn = store + offset;
+
+ if (insn->header.cmpt_control) {
+ offset += 8;
+ cmp++;
+ } else {
+ offset += 16;
+ }
+ }
+ fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8,
+ cmp * 8 * 100 / (offset + cmp * 8));
+ }
+}
diff --git a/assembler/brw_eu_debug.c b/assembler/brw_eu_debug.c
new file mode 100644
index 0000000..b446007
--- /dev/null
+++ b/assembler/brw_eu_debug.c
@@ -0,0 +1,92 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_eu.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+ static const char *file[] = {
+ "arf",
+ "grf",
+ "msg",
+ "imm"
+ };
+
+ static const char *type[] = {
+ "ud",
+ "d",
+ "uw",
+ "w",
+ "ub",
+ "vf",
+ "hf",
+ "f"
+ };
+
+ printf("%s%s",
+ hwreg.abs ? "abs/" : "",
+ hwreg.negate ? "-" : "");
+
+ if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.nr % 2 == 0 &&
+ hwreg.subnr == 0 &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+ hwreg.width == BRW_WIDTH_8 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* vector register */
+ printf("vec%d", hwreg.nr);
+ }
+ else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+ hwreg.width == BRW_WIDTH_1 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* "scalar" register */
+ printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+ }
+ else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
+ printf("imm %f", hwreg.dw1.f);
+ }
+ else {
+ printf("%s%d.%d<%d;%d,%d>:%s",
+ file[hwreg.file],
+ hwreg.nr,
+ hwreg.subnr / type_sz(hwreg.type),
+ hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+ 1<<hwreg.width,
+ hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,
+ type[hwreg.type]);
+ }
+}
+
+
+
diff --git a/assembler/brw_eu_emit.c b/assembler/brw_eu_emit.c
new file mode 100644
index 0000000..23f0da5
--- /dev/null
+++ b/assembler/brw_eu_emit.c
@@ -0,0 +1,2627 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <string.h>
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+#include "ralloc.h"
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg reg)
+{
+ if (reg.width == BRW_WIDTH_8 && p->compressed)
+ insn->header.execution_size = BRW_EXECUTE_16;
+ else
+ insn->header.execution_size = reg.width; /* note - definitions are compatible */
+}
+
+
+/**
+ * Prior to Sandybridge, the SEND instruction accepted non-MRF source
+ * registers, implicitly moving the operand to a message register.
+ *
+ * On Sandybridge, this is no longer the case. This function performs the
+ * explicit move; it should be called before emitting a SEND instruction.
+ */
+void
+gen6_resolve_implied_move(struct brw_compile *p,
+ struct brw_reg *src,
+ unsigned msg_reg_nr)
+{
+ struct intel_context *intel = &p->brw->intel;
+ if (intel->gen < 6)
+ return;
+
+ if (src->file == BRW_MESSAGE_REGISTER_FILE)
+ return;
+
+ if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
+ retype(*src, BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+ }
+ *src = brw_message_reg(msg_reg_nr);
+}
+
+static void
+gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
+{
+ /* From the BSpec / ISA Reference / send - [DevIVB+]:
+ * "The send with EOT should use register space R112-R127 for <src>. This is
+ * to enable loading of a new thread into the same slot while the message
+ * with EOT for current thread is pending dispatch."
+ *
+ * Since we're pretending to have 16 MRFs anyway, we may as well use the
+ * registers required for messages with EOT.
+ */
+ struct intel_context *intel = &p->brw->intel;
+ if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
+ reg->file = BRW_GENERAL_REGISTER_FILE;
+ reg->nr += GEN7_MRF_HACK_START;
+ }
+}
+
+
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg dest)
+{
+ if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.file != BRW_MESSAGE_REGISTER_FILE)
+ assert(dest.nr < 128);
+
+ gen7_convert_mrf_to_grf(p, &dest);
+
+ insn->bits1.da1.dest_reg_file = dest.file;
+ insn->bits1.da1.dest_reg_type = dest.type;
+ insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+ if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ insn->bits1.da1.dest_reg_nr = dest.nr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.da1.dest_subreg_nr = dest.subnr;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ /* even ignored in da16, still need to set as '01' */
+ insn->bits1.da16.dest_horiz_stride = 1;
+ }
+ }
+ else {
+ insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+ /* These are different sizes in align1 vs align16:
+ */
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ /* even ignored in da16, still need to set as '01' */
+ insn->bits1.ia16.dest_horiz_stride = 1;
+ }
+ }
+
+ /* NEW: Set the execution size based on dest.width and
+ * insn->compression_control:
+ */
+ guess_execution_size(p, insn, dest);
+}
+
+extern int reg_type_size[];
+
+static void
+validate_reg(struct brw_instruction *insn, struct brw_reg reg)
+{
+ int hstride_for_reg[] = {0, 1, 2, 4};
+ int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
+ int width_for_reg[] = {1, 2, 4, 8, 16};
+ int execsize_for_reg[] = {1, 2, 4, 8, 16, 32};
+ int width, hstride, vstride, execsize;
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ /* 3.3.6: Region Parameters. Restriction: Immediate vectors
+ * mean the destination has to be 128-bit aligned and the
+ * destination horiz stride has to be a word.
+ */
+ if (reg.type == BRW_REGISTER_TYPE_V) {
+ assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
+ reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
+ }
+
+ return;
+ }
+
+ if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ reg.file == BRW_ARF_NULL)
+ return;
+
+ assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
+ hstride = hstride_for_reg[reg.hstride];
+
+ if (reg.vstride == 0xf) {
+ vstride = -1;
+ } else {
+ assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
+ vstride = vstride_for_reg[reg.vstride];
+ }
+
+ assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
+ width = width_for_reg[reg.width];
+
+ assert(insn->header.execution_size >= 0 &&
+ insn->header.execution_size < Elements(execsize_for_reg));
+ execsize = execsize_for_reg[insn->header.execution_size];
+
+ /* Restrictions from 3.3.10: Register Region Restrictions. */
+ /* 3. */
+ assert(execsize >= width);
+
+ /* FIXME: the assembler has a lot of code written that triggers the
+ * assertions commented it below. Let's paper over it (for now!) until we
+ * can re-validate the shaders with those little inconsistencies fixed. */
+
+ /* 4. */
+#if 0
+ if (execsize == width && hstride != 0) {
+ assert(vstride == -1 || vstride == width * hstride);
+ }
+#endif
+
+ /* 5. */
+ if (execsize == width && hstride == 0) {
+ /* no restriction on vstride. */
+ }
+
+ /* 6. */
+#if 0
+ if (width == 1) {
+ assert(hstride == 0);
+ }
+#endif
+
+ /* 7. */
+#if 0
+ if (execsize == 1 && width == 1) {
+ assert(hstride == 0);
+ assert(vstride == 0);
+ }
+#endif
+
+ /* 8. */
+ if (vstride == 0 && hstride == 0) {
+ assert(width == 1);
+ }
+
+ /* 10. Check destination issues. */
+}
+
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg reg)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+
+ if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
+
+ gen7_convert_mrf_to_grf(p, &reg);
+
+ if (intel->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
+ insn->header.opcode == BRW_OPCODE_SENDC)) {
+ /* Any source modifiers or regions will be ignored, since this just
+ * identifies the MRF/GRF to start reading the message contents from.
+ * Check for some likely failures.
+ */
+ assert(!reg.negate);
+ assert(!reg.abs);
+ assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+ }
+
+ validate_reg(insn, reg);
+
+ insn->bits1.da1.src0_reg_file = reg.file;
+ insn->bits1.da1.src0_reg_type = reg.type;
+ insn->bits2.da1.src0_abs = reg.abs;
+ insn->bits2.da1.src0_negate = reg.negate;
+ insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+
+ /* Required to set some fields in src1 as well:
+ */
+
+ /* FIXME: This looks quite wrong, tempering with src1. I did not find
+ * anything in the bspec that was hinting it woud be needed when setting
+ * src0. before removing this one needs to run piglit.
+
+ insn->bits1.da1.src1_reg_file = 0;
+ insn->bits1.da1.src1_reg_type = reg.type;
+ */
+ }
+ else
+ {
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
+ }
+ else {
+ insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+ }
+ else {
+ insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+ }
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+
+ /* FIXME: While this is correct, if the assembler uses that code path
+ * the opcode generated are different and thus needs a validation
+ * pass.
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = BRW_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ */
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ /* } */
+ }
+ else {
+ insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits2.da16.src0_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+void brw_set_src1(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg reg)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
+
+ gen7_convert_mrf_to_grf(p, &reg);
+
+ validate_reg(insn, reg);
+
+ insn->bits1.da1.src1_reg_file = reg.file;
+ insn->bits1.da1.src1_reg_type = reg.type;
+ insn->bits3.da1.src1_abs = reg.abs;
+ insn->bits3.da1.src1_negate = reg.negate;
+ insn->bits3.da1.src1_address_mode = reg.address_mode;
+
+ /* Only src1 can be immediate in two-argument instructions.
+ */
+ assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+ }
+ else {
+ /* It's only BRW that does not support register-indirect addressing on
+ * src1 */
+ assert (intel->gen >= 4 || reg.address_mode == BRW_ADDRESS_DIRECT);
+
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits3.da1.src1_subreg_nr = reg.subnr;
+ insn->bits3.da1.src1_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+ insn->bits3.da16.src1_reg_nr = reg.nr;
+ }
+ }
+ else {
+ insn->bits3.ia1.src1_subreg_nr = reg.subnr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1)
+ insn->bits3.ia1.src1_indirect_offset = reg.dw1.bits.indirect_offset;
+ else
+ insn->bits3.ia16.src1_indirect_offset = reg.dw1.bits.indirect_offset / 16;
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ /* FIXME: While this is correct, if the assembler uses that code path
+ * the opcode generated are different and thus needs a validation
+ * pass.
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits3.da1.src1_width = BRW_WIDTH_1;
+ insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else { */
+ insn->bits3.da1.src1_horiz_stride = reg.hstride;
+ insn->bits3.da1.src1_width = reg.width;
+ insn->bits3.da1.src1_vert_stride = reg.vstride;
+ /* } */
+ }
+ else {
+ insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits3.da16.src1_vert_stride = reg.vstride;
+ }
+ }
+}
+
+/**
+ * Set the Message Descriptor and Extended Message Descriptor fields
+ * for SEND messages.
+ *
+ * \note This zeroes out the Function Control bits, so it must be called
+ * \b before filling out any message-specific data. Callers can
+ * choose not to fill in irrelevant bits; they will be zero.
+ */
+static void
+brw_set_message_descriptor(struct brw_compile *p,
+ struct brw_instruction *inst,
+ enum brw_message_target sfid,
+ unsigned msg_length,
+ unsigned response_length,
+ bool header_present,
+ bool end_of_thread)
+{
+ struct intel_context *intel = &p->brw->intel;
+
+ brw_set_src1(p, inst, brw_imm_d(0));
+
+ if (intel->gen >= 5) {
+ inst->bits3.generic_gen5.header_present = header_present;
+ inst->bits3.generic_gen5.response_length = response_length;
+ inst->bits3.generic_gen5.msg_length = msg_length;
+ inst->bits3.generic_gen5.end_of_thread = end_of_thread;
+
+ if (intel->gen >= 6) {
+ /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
+ inst->header.destreg__conditionalmod = sfid;
+ } else {
+ /* Set Extended Message Descriptor (ex_desc) */
+ inst->bits2.send_gen5.sfid = sfid;
+ inst->bits2.send_gen5.end_of_thread = end_of_thread;
+ }
+ } else {
+ inst->bits3.generic.response_length = response_length;
+ inst->bits3.generic.msg_length = msg_length;
+ inst->bits3.generic.msg_target = sfid;
+ inst->bits3.generic.end_of_thread = end_of_thread;
+ }
+}
+
+static void brw_set_math_message( struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned function,
+ unsigned integer_type,
+ bool low_precision,
+ unsigned dataType )
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+ unsigned msg_length;
+ unsigned response_length;
+
+ /* Infer message length from the function */
+ switch (function) {
+ case BRW_MATH_FUNCTION_POW:
+ case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
+ case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
+ case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
+ msg_length = 2;
+ break;
+ default:
+ msg_length = 1;
+ break;
+ }
+
+ /* Infer response length from the function */
+ switch (function) {
+ case BRW_MATH_FUNCTION_SINCOS:
+ case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
+ response_length = 2;
+ break;
+ default:
+ response_length = 1;
+ break;
+ }
+
+
+ brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
+ msg_length, response_length, false, false);
+ if (intel->gen == 5) {
+ insn->bits3.math_gen5.function = function;
+ insn->bits3.math_gen5.int_type = integer_type;
+ insn->bits3.math_gen5.precision = low_precision;
+ insn->bits3.math_gen5.saturate = insn->header.saturate;
+ insn->bits3.math_gen5.data_type = dataType;
+ insn->bits3.math_gen5.snapshot = 0;
+ } else {
+ insn->bits3.math.function = function;
+ insn->bits3.math.int_type = integer_type;
+ insn->bits3.math.precision = low_precision;
+ insn->bits3.math.saturate = insn->header.saturate;
+ insn->bits3.math.data_type = dataType;
+ }
+ insn->header.saturate = 0;
+}
+
+
+static void brw_set_ff_sync_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ bool allocate,
+ unsigned response_length,
+ bool end_of_thread)
+{
+ brw_set_message_descriptor(p, insn, BRW_SFID_URB,
+ 1, response_length, true, end_of_thread);
+ insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
+ insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
+ insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
+ insn->bits3.urb_gen5.allocate = allocate;
+ insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
+ insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
+}
+
+static void brw_set_urb_message( struct brw_compile *p,
+ struct brw_instruction *insn,
+ bool allocate,
+ bool used,
+ unsigned msg_length,
+ unsigned response_length,
+ bool end_of_thread,
+ bool complete,
+ unsigned offset,
+ unsigned swizzle_control )
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+
+ brw_set_message_descriptor(p, insn, BRW_SFID_URB,
+ msg_length, response_length, true, end_of_thread);
+ if (intel->gen == 7) {
+ insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
+ insn->bits3.urb_gen7.offset = offset;
+ assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
+ insn->bits3.urb_gen7.swizzle_control = swizzle_control;
+ /* per_slot_offset = 0 makes it ignore offsets in message header */
+ insn->bits3.urb_gen7.per_slot_offset = 0;
+ insn->bits3.urb_gen7.complete = complete;
+ } else if (intel->gen >= 5) {
+ insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
+ insn->bits3.urb_gen5.offset = offset;
+ insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+ insn->bits3.urb_gen5.allocate = allocate;
+ insn->bits3.urb_gen5.used = used; /* ? */
+ insn->bits3.urb_gen5.complete = complete;
+ } else {
+ insn->bits3.urb.opcode = 0; /* ? */
+ insn->bits3.urb.offset = offset;
+ insn->bits3.urb.swizzle_control = swizzle_control;
+ insn->bits3.urb.allocate = allocate;
+ insn->bits3.urb.used = used; /* ? */
+ insn->bits3.urb.complete = complete;
+ }
+}
+
+void
+brw_set_dp_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned msg_control,
+ unsigned msg_type,
+ unsigned msg_length,
+ bool header_present,
+ unsigned last_render_target,
+ unsigned response_length,
+ unsigned end_of_thread,
+ unsigned send_commit_msg)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+ unsigned sfid;
+
+ if (intel->gen >= 7) {
+ /* Use the Render Cache for RT writes; otherwise use the Data Cache */
+ if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
+ sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+ else
+ sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+ } else if (intel->gen == 6) {
+ /* Use the render cache for all write messages. */
+ sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+ } else {
+ sfid = BRW_SFID_DATAPORT_WRITE;
+ }
+
+ brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
+ header_present, end_of_thread);
+
+ if (intel->gen >= 7) {
+ insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen7_dp.msg_control = msg_control |
+ last_render_target << 6;
+ insn->bits3.gen7_dp.msg_type = msg_type;
+ } else if (intel->gen == 6) {
+ insn->bits3.gen6_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen6_dp.msg_control = msg_control |
+ last_render_target << 5;
+ insn->bits3.gen6_dp.msg_type = msg_type;
+ insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
+ } else if (intel->gen == 5) {
+ insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_gen5.msg_control = msg_control;
+ insn->bits3.dp_write_gen5.last_render_target = last_render_target;
+ insn->bits3.dp_write_gen5.msg_type = msg_type;
+ insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
+ } else {
+ insn->bits3.dp_write.binding_table_index = binding_table_index;
+ insn->bits3.dp_write.msg_control = msg_control;
+ insn->bits3.dp_write.last_render_target = last_render_target;
+ insn->bits3.dp_write.msg_type = msg_type;
+ insn->bits3.dp_write.send_commit_msg = send_commit_msg;
+ }
+}
+
+void
+brw_set_dp_read_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned msg_control,
+ unsigned msg_type,
+ unsigned target_cache,
+ unsigned msg_length,
+ bool header_present,
+ unsigned response_length)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+ unsigned sfid;
+
+ if (intel->gen >= 7) {
+ sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+ } else if (intel->gen == 6) {
+ if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
+ sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+ else
+ sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
+ } else {
+ sfid = BRW_SFID_DATAPORT_READ;
+ }
+
+ brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
+ header_present, false);
+
+ if (intel->gen >= 7) {
+ insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen7_dp.msg_control = msg_control;
+ insn->bits3.gen7_dp.msg_type = msg_type;
+ } else if (intel->gen == 6) {
+ insn->bits3.gen6_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen6_dp.msg_control = msg_control;
+ insn->bits3.gen6_dp.msg_type = msg_type;
+ insn->bits3.gen6_dp.send_commit_msg = 0;
+ } else if (intel->gen == 5) {
+ insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_gen5.msg_control = msg_control;
+ insn->bits3.dp_read_gen5.msg_type = msg_type;
+ insn->bits3.dp_read_gen5.target_cache = target_cache;
+ } else if (intel->is_g4x) {
+ insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
+ insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
+ insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
+ } else {
+ insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
+ insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
+ insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
+ }
+}
+
+void
+brw_set_sampler_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ unsigned header_present,
+ unsigned simd_mode,
+ unsigned return_format)
+{
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+
+ brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
+ response_length, header_present, false);
+
+ if (intel->gen >= 7) {
+ insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
+ insn->bits3.sampler_gen7.sampler = sampler;
+ insn->bits3.sampler_gen7.msg_type = msg_type;
+ insn->bits3.sampler_gen7.simd_mode = simd_mode;
+ } else if (intel->gen >= 5) {
+ insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
+ insn->bits3.sampler_gen5.sampler = sampler;
+ insn->bits3.sampler_gen5.msg_type = msg_type;
+ insn->bits3.sampler_gen5.simd_mode = simd_mode;
+ } else if (intel->is_g4x) {
+ insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+ insn->bits3.sampler_g4x.sampler = sampler;
+ insn->bits3.sampler_g4x.msg_type = msg_type;
+ } else {
+ insn->bits3.sampler.binding_table_index = binding_table_index;
+ insn->bits3.sampler.sampler = sampler;
+ insn->bits3.sampler.msg_type = msg_type;
+ insn->bits3.sampler.return_format = return_format;
+ }
+}
+
+
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, unsigned opcode)
+{
+ struct brw_instruction *insn;
+
+ if (p->nr_insn + 1 > p->store_size) {
+ if (0)
+ printf("incresing the store size to %d\n", p->store_size << 1);
+ p->store_size <<= 1;
+ p->store = reralloc(p->mem_ctx, p->store,
+ struct brw_instruction, p->store_size);
+ if (!p->store)
+ assert(!"realloc eu store memeory failed");
+ }
+
+ p->next_insn_offset += 16;
+ insn = &p->store[p->nr_insn++];
+ memcpy(insn, p->current, sizeof(*insn));
+
+ /* Reset this one-shot flag:
+ */
+
+ if (p->current->header.destreg__conditionalmod) {
+ p->current->header.destreg__conditionalmod = 0;
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+
+ insn->header.opcode = opcode;
+ return insn;
+}
+
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src);
+ return insn;
+}
+
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, src1);
+ return insn;
+}
+
+static int
+get_3src_subreg_nr(struct brw_reg reg)
+{
+ if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
+ assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
+ return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
+ } else {
+ return reg.subnr / 4;
+ }
+}
+
+static int get_3src_type(int type)
+{
+ assert(type == BRW_REGISTER_TYPE_F ||
+ type == BRW_REGISTER_TYPE_D ||
+ type == BRW_REGISTER_TYPE_UD);
+
+ switch(type) {
+ case BRW_REGISTER_TYPE_F: return BRW_REGISTER_3SRC_TYPE_F;
+ case BRW_REGISTER_TYPE_D: return BRW_REGISTER_3SRC_TYPE_D;
+ case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_3SRC_TYPE_UD;
+ }
+
+ return BRW_REGISTER_3SRC_TYPE_F;
+}
+
+void
+brw_set_3src_dest(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg dest)
+{
+ gen7_convert_mrf_to_grf(p, &dest);
+
+ assert(insn->header.access_mode == BRW_ALIGN_16);
+
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_MESSAGE_REGISTER_FILE);
+ assert(dest.nr < 128);
+ assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+ insn->bits1.da3src.dest_reg_type = get_3src_type(dest.type);
+ insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
+ insn->bits1.da3src.dest_reg_nr = dest.nr;
+ insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
+ guess_execution_size(p, insn, dest);
+}
+
+void
+brw_set_3src_src0(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg src0)
+{
+ assert(src0.file == BRW_GENERAL_REGISTER_FILE);
+ assert(src0.address_mode == BRW_ADDRESS_DIRECT);
+ assert(src0.nr < 128);
+ insn->bits1.da3src.src_reg_type = get_3src_type(src0.type);
+ insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
+ insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
+ insn->bits2.da3src.src0_reg_nr = src0.nr;
+ insn->bits1.da3src.src0_abs = src0.abs;
+ insn->bits1.da3src.src0_negate = src0.negate;
+ insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
+}
+
+void
+brw_set_3src_src1(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg src1)
+{
+ assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+ assert(src1.address_mode == BRW_ADDRESS_DIRECT);
+ assert(src1.nr < 128);
+ assert(src1.type == insn->bits1.da3src.src_reg_type);
+ insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
+ insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
+ insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
+ insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
+ insn->bits3.da3src.src1_reg_nr = src1.nr;
+ insn->bits1.da3src.src1_abs = src1.abs;
+ insn->bits1.da3src.src1_negate = src1.negate;
+}
+
+void
+brw_set_3src_src2(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg src2)
+{
+ assert(src2.file == BRW_GENERAL_REGISTER_FILE);
+ assert(src2.address_mode == BRW_ADDRESS_DIRECT);
+ assert(src2.nr < 128);
+ assert(src2.type == insn->bits1.da3src.src_reg_type);
+ insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
+ insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
+ insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
+ insn->bits3.da3src.src2_reg_nr = src2.nr;
+ insn->bits1.da3src.src2_abs = src2.abs;
+ insn->bits1.da3src.src2_negate = src2.negate;
+}
+
+static struct brw_instruction *brw_alu3(struct brw_compile *p,
+ unsigned opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2)
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_3src_dest(p, insn, dest);
+ brw_set_3src_src0(p, insn, src0);
+ brw_set_3src_src1(p, insn, src1);
+ brw_set_3src_src2(p, insn, src2);
+ return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0) \
+{ \
+ return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
+}
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1) \
+{ \
+ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
+}
+
+#define ALU3(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1, \
+ struct brw_reg src2) \
+{ \
+ return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
+}
+
+/* Rounding operations (other than RNDD) require two instructions - the first
+ * stores a rounded value (possibly the wrong way) in the dest register, but
+ * also sets a per-channel "increment bit" in the flag register. A predicated
+ * add of 1.0 fixes dest to contain the desired result.
+ *
+ * Sandybridge and later appear to round correctly without an ADD.
+ */
+#define ROUND(OP) \
+void brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src) \
+{ \
+ struct brw_instruction *rnd, *add; \
+ rnd = next_insn(p, BRW_OPCODE_##OP); \
+ brw_set_dest(p, rnd, dest); \
+ brw_set_src0(p, rnd, src); \
+ \
+ if (p->brw->intel.gen < 6) { \
+ /* turn on round-increments */ \
+ rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
+ add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
+ add->header.predicate_control = BRW_PREDICATE_NORMAL; \
+ } \
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+ALU2(PLN)
+ALU3(MAD)
+
+ROUND(RNDZ)
+ROUND(RNDE)
+
+
+struct brw_instruction *brw_ADD(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ /* 6.2.2: add */
+ if (src0.type == BRW_REGISTER_TYPE_F ||
+ (src0.file == BRW_IMMEDIATE_VALUE &&
+ src0.type == BRW_REGISTER_TYPE_VF)) {
+ assert(src1.type != BRW_REGISTER_TYPE_UD);
+ assert(src1.type != BRW_REGISTER_TYPE_D);
+ }
+
+ if (src1.type == BRW_REGISTER_TYPE_F ||
+ (src1.file == BRW_IMMEDIATE_VALUE &&
+ src1.type == BRW_REGISTER_TYPE_VF)) {
+ assert(src0.type != BRW_REGISTER_TYPE_UD);
+ assert(src0.type != BRW_REGISTER_TYPE_D);
+ }
+
+ return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
+}
+
+struct brw_instruction *brw_AVG(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ assert(dest.type == src0.type);
+ assert(src0.type == src1.type);
+ switch (src0.type) {
+ case BRW_REGISTER_TYPE_B:
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_W:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_UD:
+ break;
+ default:
+ assert(!"Bad type for brw_AVG");
+ }
+
+ return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
+}
+
+struct brw_instruction *brw_MUL(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ /* 6.32.38: mul */
+ if (src0.type == BRW_REGISTER_TYPE_D ||
+ src0.type == BRW_REGISTER_TYPE_UD ||
+ src1.type == BRW_REGISTER_TYPE_D ||
+ src1.type == BRW_REGISTER_TYPE_UD) {
+ assert(dest.type != BRW_REGISTER_TYPE_F);
+ }
+
+ if (src0.type == BRW_REGISTER_TYPE_F ||
+ (src0.file == BRW_IMMEDIATE_VALUE &&
+ src0.type == BRW_REGISTER_TYPE_VF)) {
+ assert(src1.type != BRW_REGISTER_TYPE_UD);
+ assert(src1.type != BRW_REGISTER_TYPE_D);
+ }
+
+ if (src1.type == BRW_REGISTER_TYPE_F ||
+ (src1.file == BRW_IMMEDIATE_VALUE &&
+ src1.type == BRW_REGISTER_TYPE_VF)) {
+ assert(src0.type != BRW_REGISTER_TYPE_UD);
+ assert(src0.type != BRW_REGISTER_TYPE_D);
+ }
+
+ assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ src0.nr != BRW_ARF_ACCUMULATOR);
+ assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ src1.nr != BRW_ARF_ACCUMULATOR);
+
+ return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
+}
+
+
+void brw_NOP(struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(p, insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+ insn->header.execution_size = 1;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+static void
+push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
+{
+ p->if_stack[p->if_stack_depth] = inst - p->store;
+
+ p->if_stack_depth++;
+ if (p->if_stack_array_size <= p->if_stack_depth) {
+ p->if_stack_array_size *= 2;
+ p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
+ p->if_stack_array_size);
+ }
+}
+
+static struct brw_instruction *
+pop_if_stack(struct brw_compile *p)
+{
+ p->if_stack_depth--;
+ return &p->store[p->if_stack[p->if_stack_depth]];
+}
+
+static void
+push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
+{
+ if (p->loop_stack_array_size < p->loop_stack_depth) {
+ p->loop_stack_array_size *= 2;
+ p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
+ p->loop_stack_array_size);
+ p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
+ p->loop_stack_array_size);
+ }
+
+ p->loop_stack[p->loop_stack_depth] = inst - p->store;
+ p->loop_stack_depth++;
+ p->if_depth_in_loop[p->loop_stack_depth] = 0;
+}
+
+static struct brw_instruction *
+get_inner_do_insn(struct brw_compile *p)
+{
+ return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack). Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off. If the stack is now empty, normal execution resumes.
+ */
+struct brw_instruction *
+brw_IF(struct brw_compile *p, unsigned execute_size)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+
+ insn = next_insn(p, BRW_OPCODE_IF);
+
+ /* Override the defaults for this instruction:
+ */
+ if (intel->gen < 6) {
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ } else if (intel->gen == 6) {
+ brw_set_dest(p, insn, brw_imm_w(0));
+ insn->bits1.branch_gen6.jump_count = 0;
+ brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+ brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+ } else {
+ brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+ brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ insn->bits3.break_cont.jip = 0;
+ insn->bits3.break_cont.uip = 0;
+ }
+
+ insn->header.execution_size = execute_size;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ push_if_stack(p, insn);
+ p->if_depth_in_loop[p->loop_stack_depth]++;
+ return insn;
+}
+
+/* This function is only used for gen6-style IF instructions with an
+ * embedded comparison (conditional modifier). It is not used on gen7.
+ */
+struct brw_instruction *
+gen6_IF(struct brw_compile *p, uint32_t conditional,
+ struct brw_reg src0, struct brw_reg src1)
+{
+ struct brw_instruction *insn;
+
+ insn = next_insn(p, BRW_OPCODE_IF);
+
+ brw_set_dest(p, insn, brw_imm_w(0));
+ if (p->compressed) {
+ insn->header.execution_size = BRW_EXECUTE_16;
+ } else {
+ insn->header.execution_size = BRW_EXECUTE_8;
+ }
+ insn->bits1.branch_gen6.jump_count = 0;
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, src1);
+
+ assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
+ assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
+ insn->header.destreg__conditionalmod = conditional;
+
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ push_if_stack(p, insn);
+ return insn;
+}
+
+/**
+ * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
+ */
+static void
+convert_IF_ELSE_to_ADD(struct brw_compile *p,
+ struct brw_instruction *if_inst,
+ struct brw_instruction *else_inst)
+{
+ /* The next instruction (where the ENDIF would be, if it existed) */
+ struct brw_instruction *next_inst = &p->store[p->nr_insn];
+
+ assert(p->single_program_flow);
+ assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
+ assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
+ assert(if_inst->header.execution_size == BRW_EXECUTE_1);
+
+ /* Convert IF to an ADD instruction that moves the instruction pointer
+ * to the first instruction of the ELSE block. If there is no ELSE
+ * block, point to where ENDIF would be. Reverse the predicate.
+ *
+ * There's no need to execute an ENDIF since we don't need to do any
+ * stack operations, and if we're currently executing, we just want to
+ * continue normally.
+ */
+ if_inst->header.opcode = BRW_OPCODE_ADD;
+ if_inst->header.predicate_inverse = 1;
+
+ if (else_inst != NULL) {
+ /* Convert ELSE to an ADD instruction that points where the ENDIF
+ * would be.
+ */
+ else_inst->header.opcode = BRW_OPCODE_ADD;
+
+ if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
+ else_inst->bits3.ud = (next_inst - else_inst) * 16;
+ } else {
+ if_inst->bits3.ud = (next_inst - if_inst) * 16;
+ }
+}
+
+/**
+ * Patch IF and ELSE instructions with appropriate jump targets.
+ */
+static void
+patch_IF_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_inst,
+ struct brw_instruction *else_inst,
+ struct brw_instruction *endif_inst)
+{
+ struct intel_context *intel = &p->brw->intel;
+
+ /* We shouldn't be patching IF and ELSE instructions in single program flow
+ * mode when gen < 6, because in single program flow mode on those
+ * platforms, we convert flow control instructions to conditional ADDs that
+ * operate on IP (see brw_ENDIF).
+ *
+ * However, on Gen6, writing to IP doesn't work in single program flow mode
+ * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
+ * not be updated by non-flow control instructions."). And on later
+ * platforms, there is no significant benefit to converting control flow
+ * instructions to conditional ADDs. So we do patch IF and ELSE
+ * instructions in single program flow mode on those platforms.
+ */
+ if (intel->gen < 6)
+ assert(!p->single_program_flow);
+
+ assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
+ assert(endif_inst != NULL);
+ assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
+
+ unsigned br = 1;
+ /* Jump count is for 64bit data chunk each, so one 128bit instruction
+ * requires 2 chunks.
+ */
+ if (intel->gen >= 5)
+ br = 2;
+
+ assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
+ endif_inst->header.execution_size = if_inst->header.execution_size;
+
+ if (else_inst == NULL) {
+ /* Patch IF -> ENDIF */
+ if (intel->gen < 6) {
+ /* Turn it into an IFF, which means no mask stack operations for
+ * all-false and jumping past the ENDIF.
+ */
+ if_inst->header.opcode = BRW_OPCODE_IFF;
+ if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
+ if_inst->bits3.if_else.pop_count = 0;
+ if_inst->bits3.if_else.pad0 = 0;
+ } else if (intel->gen == 6) {
+ /* As of gen6, there is no IFF and IF must point to the ENDIF. */
+ if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
+ } else {
+ if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+ if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
+ }
+ } else {
+ else_inst->header.execution_size = if_inst->header.execution_size;
+
+ /* Patch IF -> ELSE */
+ if (intel->gen < 6) {
+ if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
+ if_inst->bits3.if_else.pop_count = 0;
+ if_inst->bits3.if_else.pad0 = 0;
+ } else if (intel->gen == 6) {
+ if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
+ }
+
+ /* Patch ELSE -> ENDIF */
+ if (intel->gen < 6) {
+ /* BRW_OPCODE_ELSE pre-gen6 should point just past the
+ * matching ENDIF.
+ */
+ else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
+ else_inst->bits3.if_else.pop_count = 1;
+ else_inst->bits3.if_else.pad0 = 0;
+ } else if (intel->gen == 6) {
+ /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
+ else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
+ } else {
+ /* The IF instruction's JIP should point just past the ELSE */
+ if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
+ /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
+ if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+ else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
+ }
+ }
+}
+
+void
+brw_ELSE(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+
+ insn = next_insn(p, BRW_OPCODE_ELSE);
+
+ if (intel->gen < 6) {
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ } else if (intel->gen == 6) {
+ brw_set_dest(p, insn, brw_imm_w(0));
+ insn->bits1.branch_gen6.jump_count = 0;
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ } else {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ insn->bits3.break_cont.jip = 0;
+ insn->bits3.break_cont.uip = 0;
+ }
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ push_if_stack(p, insn);
+}
+
+void
+brw_ENDIF(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn = NULL;
+ struct brw_instruction *else_inst = NULL;
+ struct brw_instruction *if_inst = NULL;
+ struct brw_instruction *tmp;
+ bool emit_endif = true;
+
+ /* In single program flow mode, we can express IF and ELSE instructions
+ * equivalently as ADD instructions that operate on IP. On platforms prior
+ * to Gen6, flow control instructions cause an implied thread switch, so
+ * this is a significant savings.
+ *
+ * However, on Gen6, writing to IP doesn't work in single program flow mode
+ * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
+ * not be updated by non-flow control instructions."). And on later
+ * platforms, there is no significant benefit to converting control flow
+ * instructions to conditional ADDs. So we only do this trick on Gen4 and
+ * Gen5.
+ */
+ if (intel->gen < 6 && p->single_program_flow)
+ emit_endif = false;
+
+ /*
+ * A single next_insn() may change the base adress of instruction store
+ * memory(p->store), so call it first before referencing the instruction
+ * store pointer from an index
+ */
+ if (emit_endif)
+ insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+ /* Pop the IF and (optional) ELSE instructions from the stack */
+ p->if_depth_in_loop[p->loop_stack_depth]--;
+ tmp = pop_if_stack(p);
+ if (tmp->header.opcode == BRW_OPCODE_ELSE) {
+ else_inst = tmp;
+ tmp = pop_if_stack(p);
+ }
+ if_inst = tmp;
+
+ if (!emit_endif) {
+ /* ENDIF is useless; don't bother emitting it. */
+ convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
+ return;
+ }
+
+ if (intel->gen < 6) {
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ } else if (intel->gen == 6) {
+ brw_set_dest(p, insn, brw_imm_w(0));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ } else {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ }
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ /* Also pop item off the stack in the endif instruction: */
+ if (intel->gen < 6) {
+ insn->bits3.if_else.jump_count = 0;
+ insn->bits3.if_else.pop_count = 1;
+ insn->bits3.if_else.pad0 = 0;
+ } else if (intel->gen == 6) {
+ insn->bits1.branch_gen6.jump_count = 2;
+ } else {
+ insn->bits3.break_cont.jip = 2;
+ }
+ patch_IF_ELSE(p, if_inst, else_inst, insn);
+}
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+
+ insn = next_insn(p, BRW_OPCODE_BREAK);
+ if (intel->gen >= 6) {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ } else {
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ insn->bits3.if_else.pad0 = 0;
+ insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+
+ return insn;
+}
+
+struct brw_instruction *gen6_CONT(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ insn->bits3.if_else.pad0 = 0;
+ insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
+ return insn;
+}
+
+struct brw_instruction *gen6_HALT(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+
+ insn = next_insn(p, BRW_OPCODE_HALT);
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+
+ if (p->compressed) {
+ insn->header.execution_size = BRW_EXECUTE_16;
+ } else {
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ }
+ return insn;
+}
+
+/* DO/WHILE loop:
+ *
+ * The DO/WHILE is just an unterminated loop -- break or continue are
+ * used for control within the loop. We have a few ways they can be
+ * done.
+ *
+ * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
+ * jip and no DO instruction.
+ *
+ * For non-uniform control flow pre-gen6, there's a DO instruction to
+ * push the mask, and a WHILE to jump back, and BREAK to get out and
+ * pop the mask.
+ *
+ * For gen6, there's no more mask stack, so no need for DO. WHILE
+ * just points back to the first instruction of the loop.
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
+{
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6 || p->single_program_flow) {
+ push_loop_stack(p, &p->store[p->nr_insn]);
+ return &p->store[p->nr_insn];
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+ push_loop_stack(p, insn);
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(p, insn, brw_null_reg());
+ brw_set_src0(p, insn, brw_null_reg());
+ brw_set_src1(p, insn, brw_null_reg());
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = execute_size;
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ /* insn->header.mask_control = BRW_MASK_ENABLE; */
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+
+ return insn;
+ }
+}
+
+/**
+ * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
+ * instruction here.
+ *
+ * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
+ * nesting, since it can always just point to the end of the block/current loop.
+ */
+static void
+brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *do_inst = get_inner_do_insn(p);
+ struct brw_instruction *inst;
+ int br = (intel->gen == 5) ? 2 : 1;
+
+ for (inst = while_inst - 1; inst != do_inst; inst--) {
+ /* If the jump count is != 0, that means that this instruction has already
+ * been patched because it's part of a loop inside of the one we're
+ * patching.
+ */
+ if (inst->header.opcode == BRW_OPCODE_BREAK &&
+ inst->bits3.if_else.jump_count == 0) {
+ inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
+ } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst->bits3.if_else.jump_count == 0) {
+ inst->bits3.if_else.jump_count = br * (while_inst - inst);
+ }
+ }
+}
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn, *do_insn;
+ unsigned br = 1;
+
+ if (intel->gen >= 5)
+ br = 2;
+
+ if (intel->gen >= 7) {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+ do_insn = get_inner_do_insn(p);
+
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ insn->bits3.break_cont.jip = br * (do_insn - insn);
+
+ insn->header.execution_size = BRW_EXECUTE_8;
+ } else if (intel->gen == 6) {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+ do_insn = get_inner_do_insn(p);
+
+ brw_set_dest(p, insn, brw_imm_w(0));
+ insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+
+ insn->header.execution_size = BRW_EXECUTE_8;
+ } else {
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ do_insn = get_inner_do_insn(p);
+
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
+ insn->header.execution_size = BRW_EXECUTE_1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+ do_insn = get_inner_do_insn(p);
+
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
+
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0));
+
+ insn->header.execution_size = do_insn->header.execution_size;
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+
+ brw_patch_break_cont(p, insn);
+ }
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ p->loop_stack_depth--;
+
+ return insn;
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
+ unsigned jmpi = 1;
+
+ if (intel->gen >= 5)
+ jmpi = 2;
+
+ assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+ assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
+
+ jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register. It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned conditional,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+ insn->header.destreg__conditionalmod = conditional;
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, src1);
+
+/* guess_execution_size(insn, src0); */
+
+
+ /* Make it so that future instructions will use the computed flag
+ * value until brw_set_predicate_control_flag_value() is called
+ * again.
+ */
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.nr == 0) {
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ p->flag_value = 0xff;
+ }
+}
+
+/* Issue 'wait' instruction for n1, host could program MMIO
+ to wake up thread. */
+void brw_WAIT (struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
+ struct brw_reg src = brw_notification_1_reg();
+
+ brw_set_dest(p, insn, src);
+ brw_set_src0(p, insn, src);
+ brw_set_src1(p, insn, brw_null_reg());
+ insn->header.execution_size = 0; /* must */
+ insn->header.predicate_control = 0;
+ insn->header.compression_control = 0;
+}
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/** Extended math function, float[8].
+ */
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ unsigned msg_reg_nr,
+ struct brw_reg src,
+ unsigned data_type,
+ unsigned precision )
+{
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6) {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
+
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE);
+ assert(src.file == BRW_GENERAL_REGISTER_FILE);
+
+ assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
+ if (intel->gen == 6)
+ assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+
+ /* Source modifiers are ignored for extended math instructions on Gen6. */
+ if (intel->gen == 6) {
+ assert(!src.negate);
+ assert(!src.abs);
+ }
+
+ if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+ function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
+ function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
+ assert(src.type != BRW_REGISTER_TYPE_F);
+ } else {
+ assert(src.type == BRW_REGISTER_TYPE_F);
+ }
+
+ /* Math is the same ISA format as other opcodes, except that CondModifier
+ * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+ */
+ insn->header.destreg__conditionalmod = function;
+
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src);
+ brw_set_src1(p, insn, brw_null_reg());
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ /* Example code doesn't set predicate_control for send
+ * instructions.
+ */
+ insn->header.predicate_control = 0;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src);
+ brw_set_math_message(p,
+ insn,
+ function,
+ src.type == BRW_REGISTER_TYPE_D,
+ precision,
+ data_type);
+ }
+}
+
+/** Extended math function, float[8].
+ */
+void brw_math2(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned function,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
+
+ assert(intel->gen >= 6);
+ (void) intel;
+
+
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE);
+ assert(src0.file == BRW_GENERAL_REGISTER_FILE);
+ assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+
+ assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
+ if (intel->gen == 6) {
+ assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
+ assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
+ }
+
+ if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+ function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
+ function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
+ assert(src0.type != BRW_REGISTER_TYPE_F);
+ assert(src1.type != BRW_REGISTER_TYPE_F);
+ } else {
+ assert(src0.type == BRW_REGISTER_TYPE_F);
+ assert(src1.type == BRW_REGISTER_TYPE_F);
+ }
+
+ /* Source modifiers are ignored for extended math instructions on Gen6. */
+ if (intel->gen == 6) {
+ assert(!src0.negate);
+ assert(!src0.abs);
+ assert(!src1.negate);
+ assert(!src1.abs);
+ }
+
+ /* Math is the same ISA format as other opcodes, except that CondModifier
+ * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+ */
+ insn->header.destreg__conditionalmod = function;
+
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, src1);
+}
+
+
+/**
+ * Write a block of OWORDs (half a GRF each) from the scratch buffer,
+ * using a constant offset per channel.
+ *
+ * The offset must be aligned to oword size (16 bytes). Used for
+ * register spilling.
+ */
+void brw_oword_block_write_scratch(struct brw_compile *p,
+ struct brw_reg mrf,
+ int num_regs,
+ unsigned offset)
+{
+ struct intel_context *intel = &p->brw->intel;
+ uint32_t msg_control, msg_type;
+ int mlen;
+
+ if (intel->gen >= 6)
+ offset /= 16;
+
+ mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+
+ if (num_regs == 1) {
+ msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
+ mlen = 2;
+ } else {
+ msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
+ mlen = 3;
+ }
+
+ /* Set up the message header. This is g0, with g0.2 filled with
+ * the offset. We don't want to leave our offset around in g0 or
+ * it'll screw up texture samples, so set it up inside the message
+ * reg.
+ */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ mrf.nr,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_reg dest;
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ int send_commit_msg;
+ struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
+ BRW_REGISTER_TYPE_UW);
+
+ if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ src_header = vec16(src_header);
+ }
+ assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
+ insn->header.destreg__conditionalmod = mrf.nr;
+
+ /* Until gen6, writes followed by reads from the same location
+ * are not guaranteed to be ordered unless write_commit is set.
+ * If set, then a no-op write is issued to the destination
+ * register to set a dependency, and a read from the destination
+ * can be used to ensure the ordering.
+ *
+ * For gen6, only writes between different threads need ordering
+ * protection. Our use of DP writes is all about register
+ * spilling within a thread.
+ */
+ if (intel->gen >= 6) {
+ dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ send_commit_msg = 0;
+ } else {
+ dest = src_header;
+ send_commit_msg = 1;
+ }
+
+ brw_set_dest(p, insn, dest);
+ if (intel->gen >= 6) {
+ brw_set_src0(p, insn, mrf);
+ } else {
+ brw_set_src0(p, insn, brw_null_reg());
+ }
+
+ if (intel->gen >= 6)
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+ else
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+
+ brw_set_dp_write_message(p,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ msg_control,
+ msg_type,
+ mlen,
+ true, /* header_present */
+ 0, /* not a render target */
+ send_commit_msg, /* response_length */
+ 0, /* eot */
+ send_commit_msg);
+ }
+}
+
+
+/**
+ * Read a block of owords (half a GRF each) from the scratch buffer
+ * using a constant index per channel.
+ *
+ * Offset must be aligned to oword size (16 bytes). Used for register
+ * spilling.
+ */
+void
+brw_oword_block_read_scratch(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ int num_regs,
+ unsigned offset)
+{
+ struct intel_context *intel = &p->brw->intel;
+ uint32_t msg_control;
+ int rlen;
+
+ if (intel->gen >= 6)
+ offset /= 16;
+
+ mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+ dest = retype(dest, BRW_REGISTER_TYPE_UW);
+
+ if (num_regs == 1) {
+ msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
+ rlen = 1;
+ } else {
+ msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
+ rlen = 2;
+ }
+
+ {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ mrf.nr,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(insn->header.predicate_control == 0);
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = mrf.nr;
+
+ brw_set_dest(p, insn, dest); /* UW? */
+ if (intel->gen >= 6) {
+ brw_set_src0(p, insn, mrf);
+ } else {
+ brw_set_src0(p, insn, brw_null_reg());
+ }
+
+ brw_set_dp_read_message(p,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ msg_control,
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+ 1, /* msg_length */
+ true, /* header_present */
+ rlen);
+ }
+}
+
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ */
+void brw_oword_block_read(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ uint32_t offset,
+ uint32_t bind_table_index)
+{
+ struct intel_context *intel = &p->brw->intel;
+
+ /* On newer hardware, offset is in units of owords. */
+ if (intel->gen >= 6)
+ offset /= 16;
+
+ mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ mrf.nr,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(offset));
+
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.destreg__conditionalmod = mrf.nr;
+
+ /* cast dest to a uword[8] vector */
+ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+ brw_set_dest(p, insn, dest);
+ if (intel->gen >= 6) {
+ brw_set_src0(p, insn, mrf);
+ } else {
+ brw_set_src0(p, insn, brw_null_reg());
+ }
+
+ brw_set_dp_read_message(p,
+ insn,
+ bind_table_index,
+ BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 1, /* msg_length */
+ true, /* header_present */
+ 1); /* response_length (1 reg, 2 owords!) */
+
+ brw_pop_insn_state(p);
+}
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+ int dispatch_width,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned msg_control,
+ unsigned binding_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ bool eot,
+ bool header_present)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+ unsigned msg_type;
+ struct brw_reg dest;
+
+ if (dispatch_width == 16)
+ dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ else
+ dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+
+ if (intel->gen >= 6) {
+ insn = next_insn(p, BRW_OPCODE_SENDC);
+ } else {
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ }
+ /* The execution mask is ignored for render target writes. */
+ insn->header.predicate_control = 0;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (intel->gen >= 6) {
+ /* headerless version, just submit color payload */
+ src0 = brw_message_reg(msg_reg_nr);
+
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+ } else {
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+ }
+
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_dp_write_message(p,
+ insn,
+ binding_table_index,
+ msg_control,
+ msg_type,
+ msg_length,
+ header_present,
+ eot, /* last render target write */
+ response_length,
+ eot,
+ 0 /* send_commit_msg */);
+}
+
+
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed. See volume 4, page 161 of docs.
+ */
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ unsigned sampler,
+ unsigned writemask,
+ unsigned msg_type,
+ unsigned response_length,
+ unsigned msg_length,
+ unsigned header_present,
+ unsigned simd_mode,
+ unsigned return_format)
+{
+ struct intel_context *intel = &p->brw->intel;
+ bool need_stall = 0;
+
+ if (writemask == 0) {
+ /*printf("%s: zero writemask??\n", __FUNCTION__); */
+ return;
+ }
+
+ /* Hardware doesn't do destination dependency checking on send
+ * instructions properly. Add a workaround which generates the
+ * dependency by other means. In practice it seems like this bug
+ * only crops up for texture samples, and only where registers are
+ * written by the send and then written again later without being
+ * read in between. Luckily for us, we already track that
+ * information and use it to modify the writemask for the
+ * instruction, so that is a guide for whether a workaround is
+ * needed.
+ */
+ if (writemask != BRW_WRITEMASK_XYZW) {
+ unsigned dst_offset = 0;
+ unsigned i, newmask = 0, len = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i))
+ break;
+ dst_offset += 2;
+ }
+ for (; i < 4; i++) {
+ if (!(writemask & (1<<i)))
+ break;
+ newmask |= 1<<i;
+ len++;
+ }
+
+ if (newmask != writemask) {
+ need_stall = 1;
+ /* printf("need stall %x %x\n", newmask , writemask); */
+ }
+ else {
+ bool dispatch_16 = false;
+
+ struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+
+ guess_execution_size(p, p->current, dest);
+ if (p->current->header.execution_size == BRW_EXECUTE_16)
+ dispatch_16 = true;
+
+ newmask = ~newmask & BRW_WRITEMASK_XYZW;
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
+
+ brw_pop_insn_state(p);
+
+ src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ dest = offset(dest, dst_offset);
+
+ /* For 16-wide dispatch, masked channels are skipped in the
+ * response. For 8-wide, masked channels still take up slots,
+ * and are just not written to.
+ */
+ if (dispatch_16)
+ response_length = len * 2;
+ }
+ }
+
+ {
+ struct brw_instruction *insn;
+
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ if (intel->gen < 6)
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_sampler_message(p, insn,
+ binding_table_index,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ header_present,
+ simd_mode,
+ return_format);
+ }
+
+ if (need_stall) {
+ struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+ /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
+ */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
+ retype(reg, BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+ }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style. Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ bool allocate,
+ bool used,
+ unsigned msg_length,
+ unsigned response_length,
+ bool eot,
+ bool writes_complete,
+ unsigned offset,
+ unsigned swizzle)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+
+ if (intel->gen == 7) {
+ /* Enable Channel Masks in the URB_WRITE_HWORD message header */
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
+ BRW_REGISTER_TYPE_UD),
+ retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0xff00));
+ brw_pop_insn_state(p);
+ }
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < BRW_MAX_MRF);
+
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, brw_imm_d(0));
+
+ if (intel->gen < 6)
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_urb_message(p,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
+
+static int
+next_ip(struct brw_compile *p, int ip)
+{
+ struct brw_instruction *insn = (void *)p->store + ip;
+
+ if (insn->header.cmpt_control)
+ return ip + 8;
+ else
+ return ip + 16;
+}
+
+static int
+brw_find_next_block_end(struct brw_compile *p, int start)
+{
+ int ip;
+ void *store = p->store;
+
+ for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
+ struct brw_instruction *insn = store + ip;
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_WHILE:
+ case BRW_OPCODE_HALT:
+ return ip;
+ }
+ }
+
+ return 0;
+}
+
+/* There is no DO instruction on gen6, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+static int
+brw_find_loop_end(struct brw_compile *p, int start)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int ip;
+ int scale = 8;
+ void *store = p->store;
+
+ /* Always start after the instruction (such as a WHILE) we're trying to fix
+ * up.
+ */
+ for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
+ struct brw_instruction *insn = store + ip;
+
+ if (insn->header.opcode == BRW_OPCODE_WHILE) {
+ int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
+ : insn->bits3.break_cont.jip;
+ if (ip + jip * scale <= start)
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start;
+}
+
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK, CONT, and HALT instructions to their correct locations.
+ */
+void
+brw_set_uip_jip(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int ip;
+ int scale = 8;
+ void *store = p->store;
+
+ if (intel->gen < 6)
+ return;
+
+ for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
+ struct brw_instruction *insn = store + ip;
+
+ if (insn->header.cmpt_control) {
+ /* Fixups for compacted BREAK/CONTINUE not supported yet. */
+ assert(insn->header.opcode != BRW_OPCODE_BREAK &&
+ insn->header.opcode != BRW_OPCODE_CONTINUE &&
+ insn->header.opcode != BRW_OPCODE_HALT);
+ continue;
+ }
+
+ int block_end_ip = brw_find_next_block_end(p, ip);
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_BREAK:
+ assert(block_end_ip != 0);
+ insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+ /* Gen7 UIP points to WHILE; Gen6 points just after it */
+ insn->bits3.break_cont.uip =
+ (brw_find_loop_end(p, ip) - ip +
+ (intel->gen == 6 ? 16 : 0)) / scale;
+ break;
+ case BRW_OPCODE_CONTINUE:
+ assert(block_end_ip != 0);
+ insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+ insn->bits3.break_cont.uip =
+ (brw_find_loop_end(p, ip) - ip) / scale;
+
+ assert(insn->bits3.break_cont.uip != 0);
+ assert(insn->bits3.break_cont.jip != 0);
+ break;
+
+ case BRW_OPCODE_ENDIF:
+ if (block_end_ip == 0)
+ insn->bits3.break_cont.jip = 2;
+ else
+ insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+ break;
+
+ case BRW_OPCODE_HALT:
+ /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
+ *
+ * "In case of the halt instruction not inside any conditional
+ * code block, the value of <JIP> and <UIP> should be the
+ * same. In case of the halt instruction inside conditional code
+ * block, the <UIP> should be the end of the program, and the
+ * <JIP> should be end of the most inner conditional code block."
+ *
+ * The uip will have already been set by whoever set up the
+ * instruction.
+ */
+ if (block_end_ip == 0) {
+ insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
+ } else {
+ insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+ }
+ assert(insn->bits3.break_cont.uip != 0);
+ assert(insn->bits3.break_cont.jip != 0);
+ break;
+ }
+ }
+}
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ bool allocate,
+ unsigned response_length,
+ bool eot)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, brw_imm_d(0));
+
+ if (intel->gen < 6)
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_ff_sync_message(p,
+ insn,
+ allocate,
+ response_length,
+ eot);
+}
+
+/**
+ * Emit the SEND instruction necessary to generate stream output data on Gen6
+ * (for transform feedback).
+ *
+ * If send_commit_msg is true, this is the last piece of stream output data
+ * from this thread, so send the data as a committed write. According to the
+ * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
+ *
+ * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
+ * writes are complete by sending the final write as a committed write."
+ */
+void
+brw_svb_write(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ bool send_commit_msg)
+{
+ struct brw_instruction *insn;
+
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, brw_imm_d(0));
+ brw_set_dp_write_message(p, insn,
+ binding_table_index,
+ 0, /* msg_control: ignored */
+ GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
+ 1, /* msg_length */
+ true, /* header_present */
+ 0, /* last_render_target: ignored */
+ send_commit_msg, /* response_length */
+ 0, /* end_of_thread */
+ send_commit_msg); /* send_commit_msg */
+}
+
+/**
+ * This instruction is generated as a single-channel align1 instruction by
+ * both the VS and FS stages when using INTEL_DEBUG=shader_time.
+ *
+ * We can't use the typed atomic op in the FS because that has the execution
+ * mask ANDed with the pixel mask, but we just want to write the one dword for
+ * all the pixels.
+ *
+ * We don't use the SIMD4x2 atomic ops in the VS because want to just write
+ * one u32. So we use the same untyped atomic write message as the pixel
+ * shader.
+ *
+ * The untyped atomic operation requires a BUFFER surface type with RAW
+ * format, and is only accessible through the legacy DATA_CACHE dataport
+ * messages.
+ */
+void brw_shader_time_add(struct brw_compile *p,
+ int base_mrf,
+ uint32_t surf_index)
+{
+ struct intel_context *intel = &p->brw->intel;
+ assert(intel->gen >= 7);
+
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_pop_insn_state(p);
+
+ /* We use brw_vec1_reg and unmasked because we want to increment the given
+ * offset only once.
+ */
+ brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL, 0));
+ brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ base_mrf, 0));
+
+ bool header_present = false;
+ bool eot = false;
+ uint32_t mlen = 2; /* offset, value */
+ uint32_t rlen = 0;
+ brw_set_message_descriptor(p, send,
+ GEN7_SFID_DATAPORT_DATA_CACHE,
+ mlen, rlen, header_present, eot);
+
+ send->bits3.ud |= 6 << 14; /* untyped atomic op */
+ send->bits3.ud |= 0 << 13; /* no return data */
+ send->bits3.ud |= 1 << 12; /* SIMD8 mode */
+ send->bits3.ud |= BRW_AOP_ADD << 8;
+ send->bits3.ud |= surf_index << 0;
+}
diff --git a/assembler/brw_eu_util.c b/assembler/brw_eu_util.c
new file mode 100644
index 0000000..f9126ab
--- /dev/null
+++ b/assembler/brw_eu_util.c
@@ -0,0 +1,125 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math( p,
+ dst,
+ BRW_MATH_FUNCTION_INV,
+ 0,
+ src,
+ BRW_MATH_PRECISION_FULL,
+ BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count)
+{
+ unsigned i;
+
+ dst = vec4(dst);
+ src = vec4(src);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+ }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned count)
+{
+ unsigned i;
+
+ dst = vec8(dst);
+ src = vec8(src);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ unsigned count)
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta));
+ brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+ }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ unsigned count)
+{
+ unsigned i;
+
+ dst = vec4(dst);
+
+ for (i = 0; i < count; i++)
+ {
+ unsigned delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+ }
+}
+
+
+
+
diff --git a/assembler/brw_reg.h b/assembler/brw_reg.h
new file mode 100644
index 0000000..f225915
--- /dev/null
+++ b/assembler/brw_reg.h
@@ -0,0 +1,808 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/** @file brw_reg.h
+ *
+ * This file defines struct brw_reg, which is our representation for EU
+ * registers. They're not a hardware specific format, just an abstraction
+ * that intends to capture the full flexibility of the hardware registers.
+ *
+ * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
+ * the abstract brw_reg type into the actual hardware instruction encoding.
+ */
+
+#ifndef BRW_REG_H
+#define BRW_REG_H
+
+#include <stdbool.h>
+#include <assert.h>
+#include "brw_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/**
+ * First GRF used for the MRF hack.
+ *
+ * On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We
+ * haven't converted our compiler to be aware of this, so it asks for MRFs and
+ * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The
+ * register allocators have to be careful of this to avoid corrupting the "MRF"s
+ * with actual GRF allocations.
+ */
+#define GEN7_MRF_HACK_START 112
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
+#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+
+static inline bool
+brw_is_single_value_swizzle(int swiz)
+{
+ return (swiz == BRW_SWIZZLE_XXXX ||
+ swiz == BRW_SWIZZLE_YYYY ||
+ swiz == BRW_SWIZZLE_ZZZZ ||
+ swiz == BRW_SWIZZLE_WWWW);
+}
+
+#define BRW_WRITEMASK_X 0x1
+#define BRW_WRITEMASK_Y 0x2
+#define BRW_WRITEMASK_Z 0x4
+#define BRW_WRITEMASK_W 0x8
+
+#define BRW_WRITEMASK_XY (BRW_WRITEMASK_X | BRW_WRITEMASK_Y)
+#define BRW_WRITEMASK_XZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Z)
+#define BRW_WRITEMASK_XW (BRW_WRITEMASK_X | BRW_WRITEMASK_W)
+#define BRW_WRITEMASK_YW (BRW_WRITEMASK_Y | BRW_WRITEMASK_W)
+#define BRW_WRITEMASK_ZW (BRW_WRITEMASK_Z | BRW_WRITEMASK_W)
+#define BRW_WRITEMASK_XYZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | BRW_WRITEMASK_Z)
+#define BRW_WRITEMASK_XYZW (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | \
+ BRW_WRITEMASK_Z | BRW_WRITEMASK_W)
+
+#define REG_SIZE (8*4)
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges. Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg {
+ unsigned type:4;
+ unsigned file:2;
+ unsigned nr:8;
+ unsigned subnr:5; /* :1 in align16 */
+ unsigned negate:1; /* source only */
+ unsigned abs:1; /* source only */
+ unsigned vstride:4; /* source only */
+ unsigned width:3; /* src only, align1 only */
+ unsigned hstride:2; /* align1 only */
+ unsigned address_mode:1; /* relative addressing, hopefully! */
+ unsigned pad0:1;
+
+ union {
+ struct {
+ unsigned swizzle:8; /* src only, align16 only */
+ unsigned writemask:4; /* dest only, align16 only */
+ int indirect_offset:10; /* relative addressing offset */
+ unsigned pad1:10; /* two dwords total */
+ } bits;
+
+ float f;
+ int d;
+ unsigned ud;
+ } dw1;
+};
+
+
+struct brw_indirect {
+ unsigned addr_subnr:4;
+ int addr_offset:10;
+ unsigned pad:18;
+};
+
+
+static inline int
+type_sz(unsigned type)
+{
+ switch(type) {
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_F:
+ return 4;
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ return 2;
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_B:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Construct a brw_reg.
+ * \param file one of the BRW_x_REGISTER_FILE values
+ * \param nr register number/index
+ * \param subnr register sub number
+ * \param type one of BRW_REGISTER_TYPE_x
+ * \param vstride one of BRW_VERTICAL_STRIDE_x
+ * \param width one of BRW_WIDTH_x
+ * \param hstride one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle one of BRW_SWIZZLE_x
+ * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield
+ */
+static inline struct brw_reg
+brw_reg(unsigned file,
+ unsigned nr,
+ unsigned subnr,
+ unsigned type,
+ unsigned vstride,
+ unsigned width,
+ unsigned hstride,
+ unsigned swizzle,
+ unsigned writemask)
+{
+ struct brw_reg reg;
+ if (file == BRW_GENERAL_REGISTER_FILE)
+ assert(nr < BRW_MAX_GRF);
+ else if (file == BRW_MESSAGE_REGISTER_FILE)
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+ else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(nr <= BRW_ARF_TIMESTAMP);
+
+ reg.type = type;
+ reg.file = file;
+ reg.nr = nr;
+ reg.subnr = subnr * type_sz(type);
+ reg.negate = 0;
+ reg.abs = 0;
+ reg.vstride = vstride;
+ reg.width = width;
+ reg.hstride = hstride;
+ reg.address_mode = BRW_ADDRESS_DIRECT;
+ reg.pad0 = 0;
+
+ /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+ * set swizzle and writemask to W, as the lower bits of subnr will
+ * be lost when converted to align16. This is probably too much to
+ * keep track of as you'd want it adjusted by suboffset(), etc.
+ * Perhaps fix up when converting to align16?
+ */
+ reg.dw1.bits.swizzle = swizzle;
+ reg.dw1.bits.writemask = writemask;
+ reg.dw1.bits.indirect_offset = 0;
+ reg.dw1.bits.pad1 = 0;
+ return reg;
+}
+
+/** Construct float[16] register */
+static inline struct brw_reg
+brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_16,
+ BRW_WIDTH_16,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[8] register */
+static inline struct brw_reg
+brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[4] register */
+static inline struct brw_reg
+brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[2] register */
+static inline struct brw_reg
+brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYXY,
+ BRW_WRITEMASK_XY);
+}
+
+/** Construct float[1] register */
+static inline struct brw_reg
+brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ BRW_WRITEMASK_X);
+}
+
+
+static inline struct brw_reg
+retype(struct brw_reg reg, unsigned type)
+{
+ reg.type = type;
+ return reg;
+}
+
+static inline struct brw_reg
+sechalf(struct brw_reg reg)
+{
+ if (reg.vstride)
+ reg.nr++;
+ return reg;
+}
+
+static inline struct brw_reg
+suboffset(struct brw_reg reg, unsigned delta)
+{
+ reg.subnr += delta * type_sz(reg.type);
+ return reg;
+}
+
+
+static inline struct brw_reg
+offset(struct brw_reg reg, unsigned delta)
+{
+ reg.nr += delta;
+ return reg;
+}
+
+
+static inline struct brw_reg
+byte_offset(struct brw_reg reg, unsigned bytes)
+{
+ unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+ reg.nr = newoffset / REG_SIZE;
+ reg.subnr = newoffset % REG_SIZE;
+ return reg;
+}
+
+
+/** Construct unsigned word[16] register */
+static inline struct brw_reg
+brw_uw16_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[8] register */
+static inline struct brw_reg
+brw_uw8_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[1] register */
+static inline struct brw_reg
+brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static inline struct brw_reg
+brw_imm_reg(unsigned type)
+{
+ return brw_reg(BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
+}
+
+/** Construct float immediate register */
+static inline struct brw_reg
+brw_imm_f(float f)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+ imm.dw1.f = f;
+ return imm;
+}
+
+/** Construct integer immediate register */
+static inline struct brw_reg
+brw_imm_d(int d)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+ imm.dw1.d = d;
+ return imm;
+}
+
+/** Construct uint immediate register */
+static inline struct brw_reg
+brw_imm_ud(unsigned ud)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+ imm.dw1.ud = ud;
+ return imm;
+}
+
+/** Construct ushort immediate register */
+static inline struct brw_reg
+brw_imm_uw(uint16_t uw)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+ imm.dw1.ud = uw | (uw << 16);
+ return imm;
+}
+
+/** Construct short immediate register */
+static inline struct brw_reg
+brw_imm_w(int16_t w)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+ imm.dw1.d = w | (w << 16);
+ return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/** Construct vector of eight signed half-byte values */
+static inline struct brw_reg
+brw_imm_v(unsigned v)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_8;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+/** Construct vector of four 8-bit float values */
+static inline struct brw_reg
+brw_imm_vf(unsigned v)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE 0x30
+#define VF_NEG (1<<7)
+
+static inline struct brw_reg
+brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
+ return imm;
+}
+
+
+static inline struct brw_reg
+brw_address(struct brw_reg reg)
+{
+ return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+/** Construct float[1] general-purpose register */
+static inline struct brw_reg
+brw_vec1_grf(unsigned nr, unsigned subnr)
+{
+ return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[2] general-purpose register */
+static inline struct brw_reg
+brw_vec2_grf(unsigned nr, unsigned subnr)
+{
+ return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[4] general-purpose register */
+static inline struct brw_reg
+brw_vec4_grf(unsigned nr, unsigned subnr)
+{
+ return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[8] general-purpose register */
+static inline struct brw_reg
+brw_vec8_grf(unsigned nr, unsigned subnr)
+{
+ return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static inline struct brw_reg
+brw_uw8_grf(unsigned nr, unsigned subnr)
+{
+ return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static inline struct brw_reg
+brw_uw16_grf(unsigned nr, unsigned subnr)
+{
+ return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
+static inline struct brw_reg
+brw_null_reg(void)
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
+}
+
+static inline struct brw_reg
+brw_address_reg(unsigned subnr)
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw. This goes against the convention for other scalar
+ * regs:
+ */
+static inline struct brw_reg
+brw_ip_reg(void)
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_IP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_4, /* ? */
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, /* NOTE! */
+ BRW_WRITEMASK_XYZW); /* NOTE! */
+}
+
+static inline struct brw_reg
+brw_acc_reg(void)
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, 0);
+}
+
+static inline struct brw_reg
+brw_notification_1_reg(void)
+{
+
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NOTIFICATION_COUNT,
+ 1,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ BRW_WRITEMASK_X);
+}
+
+
+static inline struct brw_reg
+brw_flag_reg(int reg, int subreg)
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_FLAG + reg, subreg);
+}
+
+
+static inline struct brw_reg
+brw_mask_reg(unsigned subnr)
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr);
+}
+
+static inline struct brw_reg
+brw_message_reg(unsigned nr)
+{
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+ return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
+}
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static inline unsigned cvt(unsigned val)
+{
+ switch (val) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 3;
+ case 8: return 4;
+ case 16: return 5;
+ case 32: return 6;
+ }
+ return 0;
+}
+
+static inline struct brw_reg
+stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
+{
+ reg.vstride = cvt(vstride);
+ reg.width = cvt(width) - 1;
+ reg.hstride = cvt(hstride);
+ return reg;
+}
+
+
+static inline struct brw_reg
+vec16(struct brw_reg reg)
+{
+ return stride(reg, 16,16,1);
+}
+
+static inline struct brw_reg
+vec8(struct brw_reg reg)
+{
+ return stride(reg, 8,8,1);
+}
+
+static inline struct brw_reg
+vec4(struct brw_reg reg)
+{
+ return stride(reg, 4,4,1);
+}
+
+static inline struct brw_reg
+vec2(struct brw_reg reg)
+{
+ return stride(reg, 2,2,1);
+}
+
+static inline struct brw_reg
+vec1(struct brw_reg reg)
+{
+ return stride(reg, 0,1,0);
+}
+
+
+static inline struct brw_reg
+get_element(struct brw_reg reg, unsigned elt)
+{
+ return vec1(suboffset(reg, elt));
+}
+
+static inline struct brw_reg
+get_element_ud(struct brw_reg reg, unsigned elt)
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+static inline struct brw_reg
+get_element_d(struct brw_reg reg, unsigned elt)
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
+}
+
+
+static inline struct brw_reg
+brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w)
+{
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
+
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+ return reg;
+}
+
+
+static inline struct brw_reg
+brw_swizzle1(struct brw_reg reg, unsigned x)
+{
+ return brw_swizzle(reg, x, x, x, x);
+}
+
+static inline struct brw_reg
+brw_writemask(struct brw_reg reg, unsigned mask)
+{
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
+ reg.dw1.bits.writemask &= mask;
+ return reg;
+}
+
+static inline struct brw_reg
+brw_set_writemask(struct brw_reg reg, unsigned mask)
+{
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
+ reg.dw1.bits.writemask = mask;
+ return reg;
+}
+
+static inline struct brw_reg
+negate(struct brw_reg reg)
+{
+ reg.negate ^= 1;
+ return reg;
+}
+
+static inline struct brw_reg
+brw_abs(struct brw_reg reg)
+{
+ reg.abs = 1;
+ reg.negate = 0;
+ return reg;
+}
+
+/************************************************************************/
+
+static inline struct brw_reg
+brw_vec4_indirect(unsigned subnr, int offset)
+{
+ struct brw_reg reg = brw_vec4_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static inline struct brw_reg
+brw_vec1_indirect(unsigned subnr, int offset)
+{
+ struct brw_reg reg = brw_vec1_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static inline struct brw_reg
+deref_4f(struct brw_indirect ptr, int offset)
+{
+ return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static inline struct brw_reg
+deref_1f(struct brw_indirect ptr, int offset)
+{
+ return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static inline struct brw_reg
+deref_4b(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static inline struct brw_reg
+deref_1uw(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static inline struct brw_reg
+deref_1d(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static inline struct brw_reg
+deref_1ud(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static inline struct brw_reg
+get_addr_reg(struct brw_indirect ptr)
+{
+ return brw_address_reg(ptr.addr_subnr);
+}
+
+static inline struct brw_indirect
+brw_indirect_offset(struct brw_indirect ptr, int offset)
+{
+ ptr.addr_offset += offset;
+ return ptr;
+}
+
+static inline struct brw_indirect
+brw_indirect(unsigned addr_subnr, int offset)
+{
+ struct brw_indirect ptr;
+ ptr.addr_subnr = addr_subnr;
+ ptr.addr_offset = offset;
+ ptr.pad = 0;
+ return ptr;
+}
+
+/** Do two brw_regs refer to the same register? */
+static inline bool
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+ return r1.file == r2.file && r1.nr == r2.nr;
+}
+
+void brw_print_reg(struct brw_reg reg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/assembler/brw_structs.h b/assembler/brw_structs.h
new file mode 100644
index 0000000..8c2d2b9
--- /dev/null
+++ b/assembler/brw_structs.h
@@ -0,0 +1,1493 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+#include <stdint.h>
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define BRW_FLUSH_READ_CACHE 0x1
+#define BRW_FLUSH_STATE_CACHE 0x2
+#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct brw_urb_fence
+{
+ struct
+ {
+ unsigned length:8;
+ unsigned vs_realloc:1;
+ unsigned gs_realloc:1;
+ unsigned clp_realloc:1;
+ unsigned sf_realloc:1;
+ unsigned vfe_realloc:1;
+ unsigned cs_realloc:1;
+ unsigned pad:2;
+ unsigned opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned vs_fence:10;
+ unsigned gs_fence:10;
+ unsigned clp_fence:10;
+ unsigned pad:2;
+ } bits0;
+
+ struct
+ {
+ unsigned sf_fence:10;
+ unsigned vf_fence:10;
+ unsigned cs_fence:11;
+ unsigned pad:1;
+ } bits1;
+};
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ unsigned pad0:1;
+ unsigned grf_reg_count:3;
+ unsigned pad1:2;
+ unsigned kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
+};
+
+struct thread1
+{
+ unsigned ext_halt_exception_enable:1;
+ unsigned sw_exception_enable:1;
+ unsigned mask_stack_exception_enable:1;
+ unsigned timeout_exception_enable:1;
+ unsigned illegal_op_exception_enable:1;
+ unsigned pad0:3;
+ unsigned depth_coef_urb_read_offset:6; /* WM only */
+ unsigned pad1:2;
+ unsigned floating_point_mode:1;
+ unsigned thread_priority:1;
+ unsigned binding_table_entry_count:8;
+ unsigned pad3:5;
+ unsigned single_program_flow:1;
+};
+
+struct thread2
+{
+ unsigned per_thread_scratch_space:4;
+ unsigned pad0:6;
+ unsigned scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ unsigned dispatch_grf_start_reg:4;
+ unsigned urb_entry_read_offset:6;
+ unsigned pad0:1;
+ unsigned urb_entry_read_length:6;
+ unsigned pad1:1;
+ unsigned const_urb_entry_read_offset:6;
+ unsigned pad2:1;
+ unsigned const_urb_entry_read_length:6;
+ unsigned pad3:1;
+};
+
+
+
+struct brw_clip_unit_state
+{
+ struct thread0 thread0;
+ struct
+ {
+ unsigned pad0:7;
+ unsigned sw_exception_enable:1;
+ unsigned pad1:3;
+ unsigned mask_stack_exception_enable:1;
+ unsigned pad2:1;
+ unsigned illegal_op_exception_enable:1;
+ unsigned pad3:2;
+ unsigned floating_point_mode:1;
+ unsigned thread_priority:1;
+ unsigned binding_table_entry_count:8;
+ unsigned pad4:5;
+ unsigned single_program_flow:1;
+ } thread1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:9;
+ unsigned gs_output_stats:1; /* not always */
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:5; /* may be less */
+ unsigned pad3:2;
+ } thread4;
+
+ struct
+ {
+ unsigned pad0:13;
+ unsigned clip_mode:3;
+ unsigned userclip_enable_flags:8;
+ unsigned userclip_must_clip:1;
+ unsigned negative_w_clip_test:1;
+ unsigned guard_band_enable:1;
+ unsigned viewport_z_clip_enable:1;
+ unsigned viewport_xy_clip_enable:1;
+ unsigned vertex_position_space:1;
+ unsigned api_mode:1;
+ unsigned pad2:1;
+ } clip5;
+
+ struct
+ {
+ unsigned pad0:5;
+ unsigned clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ float viewport_xmin;
+ float viewport_xmax;
+ float viewport_ymin;
+ float viewport_ymax;
+};
+
+struct gen6_blend_state
+{
+ struct {
+ unsigned dest_blend_factor:5;
+ unsigned source_blend_factor:5;
+ unsigned pad3:1;
+ unsigned blend_func:3;
+ unsigned pad2:1;
+ unsigned ia_dest_blend_factor:5;
+ unsigned ia_source_blend_factor:5;
+ unsigned pad1:1;
+ unsigned ia_blend_func:3;
+ unsigned pad0:1;
+ unsigned ia_blend_enable:1;
+ unsigned blend_enable:1;
+ } blend0;
+
+ struct {
+ unsigned post_blend_clamp_enable:1;
+ unsigned pre_blend_clamp_enable:1;
+ unsigned clamp_range:2;
+ unsigned pad0:4;
+ unsigned x_dither_offset:2;
+ unsigned y_dither_offset:2;
+ unsigned dither_enable:1;
+ unsigned alpha_test_func:3;
+ unsigned alpha_test_enable:1;
+ unsigned pad1:1;
+ unsigned logic_op_func:4;
+ unsigned logic_op_enable:1;
+ unsigned pad2:1;
+ unsigned write_disable_b:1;
+ unsigned write_disable_g:1;
+ unsigned write_disable_r:1;
+ unsigned write_disable_a:1;
+ unsigned pad3:1;
+ unsigned alpha_to_coverage_dither:1;
+ unsigned alpha_to_one:1;
+ unsigned alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ unsigned alpha_test_format:1;
+ unsigned pad0:14;
+ unsigned round_disable:1;
+ unsigned bf_stencil_ref:8;
+ unsigned stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ unsigned ui:8;
+ unsigned pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ unsigned pad0:3;
+ unsigned bf_stencil_pass_depth_pass_op:3;
+ unsigned bf_stencil_pass_depth_fail_op:3;
+ unsigned bf_stencil_fail_op:3;
+ unsigned bf_stencil_func:3;
+ unsigned bf_stencil_enable:1;
+ unsigned pad1:2;
+ unsigned stencil_write_enable:1;
+ unsigned stencil_pass_depth_pass_op:3;
+ unsigned stencil_pass_depth_fail_op:3;
+ unsigned stencil_fail_op:3;
+ unsigned stencil_func:3;
+ unsigned stencil_enable:1;
+ } ds0;
+
+ struct {
+ unsigned bf_stencil_write_mask:8;
+ unsigned bf_stencil_test_mask:8;
+ unsigned stencil_write_mask:8;
+ unsigned stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ unsigned pad0:26;
+ unsigned depth_write_enable:1;
+ unsigned depth_test_func:3;
+ unsigned pad1:1;
+ unsigned depth_test_enable:1;
+ } ds2;
+};
+
+struct brw_cc_unit_state
+{
+ struct
+ {
+ unsigned pad0:3;
+ unsigned bf_stencil_pass_depth_pass_op:3;
+ unsigned bf_stencil_pass_depth_fail_op:3;
+ unsigned bf_stencil_fail_op:3;
+ unsigned bf_stencil_func:3;
+ unsigned bf_stencil_enable:1;
+ unsigned pad1:2;
+ unsigned stencil_write_enable:1;
+ unsigned stencil_pass_depth_pass_op:3;
+ unsigned stencil_pass_depth_fail_op:3;
+ unsigned stencil_fail_op:3;
+ unsigned stencil_func:3;
+ unsigned stencil_enable:1;
+ } cc0;
+
+
+ struct
+ {
+ unsigned bf_stencil_ref:8;
+ unsigned stencil_write_mask:8;
+ unsigned stencil_test_mask:8;
+ unsigned stencil_ref:8;
+ } cc1;
+
+
+ struct
+ {
+ unsigned logicop_enable:1;
+ unsigned pad0:10;
+ unsigned depth_write_enable:1;
+ unsigned depth_test_function:3;
+ unsigned depth_test:1;
+ unsigned bf_stencil_write_mask:8;
+ unsigned bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct
+ {
+ unsigned pad0:8;
+ unsigned alpha_test_func:3;
+ unsigned alpha_test:1;
+ unsigned blend_enable:1;
+ unsigned ia_blend_enable:1;
+ unsigned pad1:1;
+ unsigned alpha_test_format:1;
+ unsigned pad2:16;
+ } cc3;
+
+ struct
+ {
+ unsigned pad0:5;
+ unsigned cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+ } cc4;
+
+ struct
+ {
+ unsigned pad0:2;
+ unsigned ia_dest_blend_factor:5;
+ unsigned ia_src_blend_factor:5;
+ unsigned ia_blend_function:3;
+ unsigned statistics_enable:1;
+ unsigned logicop_func:4;
+ unsigned pad1:11;
+ unsigned dither_enable:1;
+ } cc5;
+
+ struct
+ {
+ unsigned clamp_post_alpha_blend:1;
+ unsigned clamp_pre_alpha_blend:1;
+ unsigned clamp_range:2;
+ unsigned pad0:11;
+ unsigned y_dither_offset:2;
+ unsigned x_dither_offset:2;
+ unsigned dest_blend_factor:5;
+ unsigned src_blend_factor:5;
+ unsigned blend_function:3;
+ } cc6;
+
+ struct {
+ union {
+ float f;
+ uint8_t ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+struct brw_sf_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:10;
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:6;
+ unsigned pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned front_winding:1;
+ unsigned viewport_transform:1;
+ unsigned pad0:3;
+ unsigned sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+ } sf5;
+
+ struct
+ {
+ unsigned pad0:9;
+ unsigned dest_org_vbias:4;
+ unsigned dest_org_hbias:4;
+ unsigned scissor:1;
+ unsigned disable_2x2_trifilter:1;
+ unsigned disable_zero_pix_trifilter:1;
+ unsigned point_rast_rule:2;
+ unsigned line_endcap_aa_region_width:2;
+ unsigned line_width:4;
+ unsigned fast_scissor_disable:1;
+ unsigned cull_mode:2;
+ unsigned aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ unsigned point_size:11;
+ unsigned use_point_size_state:1;
+ unsigned subpixel_precision:1;
+ unsigned sprite_point:1;
+ unsigned pad0:10;
+ unsigned aa_line_distance_mode:1;
+ unsigned trifan_pv:2;
+ unsigned linestrip_pv:2;
+ unsigned tristrip_pv:2;
+ unsigned line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+struct gen6_scissor_rect
+{
+ unsigned xmin:16;
+ unsigned ymin:16;
+ unsigned xmax:16;
+ unsigned ymax:16;
+};
+
+struct brw_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:8;
+ unsigned rendering_enable:1; /* for Ironlake */
+ unsigned pad4:1;
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:5;
+ unsigned pad3:2;
+ } thread4;
+
+ struct
+ {
+ unsigned sampler_count:3;
+ unsigned pad0:2;
+ unsigned sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ unsigned max_vp_index:4;
+ unsigned pad0:12;
+ unsigned svbi_post_inc_value:10;
+ unsigned pad1:1;
+ unsigned svbi_post_inc_enable:1;
+ unsigned svbi_payload:1;
+ unsigned discard_adjaceny:1;
+ unsigned reorder_enable:1;
+ unsigned pad2:1;
+ } gs6;
+};
+
+
+struct brw_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned pad0:10;
+ unsigned stats_enable:1;
+ unsigned nr_urb_entries:7;
+ unsigned pad1:1;
+ unsigned urb_entry_allocation_size:5;
+ unsigned pad2:1;
+ unsigned max_threads:6;
+ unsigned pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned sampler_count:3;
+ unsigned pad0:2;
+ unsigned sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ unsigned vs_enable:1;
+ unsigned vert_cache_disable:1;
+ unsigned pad0:30;
+ } vs6;
+};
+
+
+struct brw_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ unsigned stats_enable:1;
+ unsigned depth_buffer_clear:1;
+ unsigned sampler_count:3;
+ unsigned sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ unsigned enable_8_pix:1;
+ unsigned enable_16_pix:1;
+ unsigned enable_32_pix:1;
+ unsigned enable_con_32_pix:1;
+ unsigned enable_con_64_pix:1;
+ unsigned pad0:1;
+
+ /* These next four bits are for Ironlake+ */
+ unsigned fast_span_coverage_enable:1;
+ unsigned depth_buffer_clear:1;
+ unsigned depth_buffer_resolve_enable:1;
+ unsigned hierarchical_depth_buffer_resolve_enable:1;
+
+ unsigned legacy_global_depth_bias:1;
+ unsigned line_stipple:1;
+ unsigned depth_offset:1;
+ unsigned polygon_stipple:1;
+ unsigned line_aa_region_width:2;
+ unsigned line_endcap_aa_region_width:2;
+ unsigned early_depth_test:1;
+ unsigned thread_dispatch_enable:1;
+ unsigned program_uses_depth:1;
+ unsigned program_computes_depth:1;
+ unsigned program_uses_killpixel:1;
+ unsigned legacy_line_rast: 1;
+ unsigned transposed_urb_read_enable:1;
+ unsigned max_threads:7;
+ } wm5;
+
+ float global_depth_offset_constant;
+ float global_depth_offset_scale;
+
+ /* for Ironlake only */
+ struct {
+ unsigned pad0:1;
+ unsigned grf_reg_count_1:3;
+ unsigned pad1:2;
+ unsigned kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ unsigned pad0:1;
+ unsigned grf_reg_count_2:3;
+ unsigned pad1:2;
+ unsigned kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ unsigned pad0:1;
+ unsigned grf_reg_count_3:3;
+ unsigned pad1:2;
+ unsigned kernel_start_pointer_3:26;
+ } wm10;
+};
+
+struct brw_sampler_default_color {
+ float color[4];
+};
+
+struct gen5_sampler_default_color {
+ uint8_t ub[4];
+ float f[4];
+ uint16_t hf[4];
+ uint16_t us[4];
+ int16_t s[4];
+ uint8_t b[4];
+};
+
+struct brw_sampler_state
+{
+
+ struct
+ {
+ unsigned shadow_function:3;
+ unsigned lod_bias:11;
+ unsigned min_filter:3;
+ unsigned mag_filter:3;
+ unsigned mip_filter:2;
+ unsigned base_level:5;
+ unsigned min_mag_neq:1;
+ unsigned lod_preclamp:1;
+ unsigned default_color_mode:1;
+ unsigned pad0:1;
+ unsigned disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned r_wrap_mode:3;
+ unsigned t_wrap_mode:3;
+ unsigned s_wrap_mode:3;
+ unsigned cube_control_mode:1;
+ unsigned pad:2;
+ unsigned max_lod:10;
+ unsigned min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned default_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ unsigned non_normalized_coord:1;
+ unsigned pad:12;
+ unsigned address_round:6;
+ unsigned max_aniso:3;
+ unsigned chroma_key_mode:1;
+ unsigned chroma_key_index:2;
+ unsigned chroma_key_enable:1;
+ unsigned monochrome_filter_width:3;
+ unsigned monochrome_filter_height:3;
+ } ss3;
+};
+
+struct gen7_sampler_state
+{
+ struct
+ {
+ unsigned aniso_algorithm:1;
+ unsigned lod_bias:13;
+ unsigned min_filter:3;
+ unsigned mag_filter:3;
+ unsigned mip_filter:2;
+ unsigned base_level:5;
+ unsigned pad1:1;
+ unsigned lod_preclamp:1;
+ unsigned default_color_mode:1;
+ unsigned pad0:1;
+ unsigned disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned cube_control_mode:1;
+ unsigned shadow_function:3;
+ unsigned pad:4;
+ unsigned max_lod:12;
+ unsigned min_lod:12;
+ } ss1;
+
+ struct
+ {
+ unsigned pad:5;
+ unsigned default_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ unsigned r_wrap_mode:3;
+ unsigned t_wrap_mode:3;
+ unsigned s_wrap_mode:3;
+ unsigned pad:1;
+ unsigned non_normalized_coord:1;
+ unsigned trilinear_quality:2;
+ unsigned address_round:6;
+ unsigned max_aniso:3;
+ unsigned chroma_key_mode:1;
+ unsigned chroma_key_index:2;
+ unsigned chroma_key_enable:1;
+ unsigned pad0:6;
+ } ss3;
+};
+
+struct brw_clipper_viewport
+{
+ float xmin;
+ float xmax;
+ float ymin;
+ float ymax;
+};
+
+struct brw_cc_viewport
+{
+ float min_depth;
+ float max_depth;
+};
+
+struct brw_sf_viewport
+{
+ struct {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+ } viewport;
+
+ /* scissor coordinates are inclusive */
+ struct {
+ int16_t xmin;
+ int16_t ymin;
+ int16_t xmax;
+ int16_t ymax;
+ } scissor;
+};
+
+struct gen6_sf_viewport {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+};
+
+struct gen7_sf_clip_viewport {
+ struct {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+ } viewport;
+
+ unsigned pad0[2];
+
+ struct {
+ float xmin;
+ float xmax;
+ float ymin;
+ float ymax;
+ } guardband;
+
+ float pad1[4];
+};
+
+struct brw_vertex_element_state
+{
+ struct
+ {
+ unsigned src_offset:11;
+ unsigned pad:5;
+ unsigned src_format:9;
+ unsigned pad0:1;
+ unsigned valid:1;
+ unsigned vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ unsigned dst_offset:8;
+ unsigned pad:8;
+ unsigned vfcomponent3:4;
+ unsigned vfcomponent2:4;
+ unsigned vfcomponent1:4;
+ unsigned vfcomponent0:4;
+ } ve1;
+};
+
+struct brw_urb_immediate {
+ unsigned opcode:4;
+ unsigned offset:6;
+ unsigned swizzle_control:2;
+ unsigned pad:1;
+ unsigned allocate:1;
+ unsigned used:1;
+ unsigned complete:1;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct brw_instruction
+{
+ struct
+ {
+ unsigned opcode:7;
+ unsigned pad:1;
+ unsigned access_mode:1;
+ unsigned mask_control:1;
+ unsigned dependency_control:2;
+ unsigned compression_control:2; /* gen6: quater control */
+ unsigned thread_control:2;
+ unsigned predicate_control:4;
+ unsigned predicate_inverse:1;
+ unsigned execution_size:3;
+ /**
+ * Conditional Modifier for most instructions. On Gen6+, this is also
+ * used for the SEND instruction's Message Target/SFID.
+ */
+ unsigned destreg__conditionalmod:4;
+ unsigned acc_wr_control:1;
+ unsigned cmpt_control:1;
+ unsigned debug_control:1;
+ unsigned saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned src1_reg_file:2;
+ unsigned src1_reg_type:3;
+ unsigned pad:1;
+ unsigned dest_subreg_nr:5;
+ unsigned dest_reg_nr:8;
+ unsigned dest_horiz_stride:2;
+ unsigned dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned src1_reg_file:2; /* 0x00000c00 */
+ unsigned src1_reg_type:3; /* 0x00007000 */
+ unsigned pad:1;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ unsigned dest_horiz_stride:2;
+ unsigned dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned src1_reg_file:2;
+ unsigned src1_reg_type:3;
+ unsigned pad:1;
+ unsigned dest_writemask:4;
+ unsigned dest_subreg_nr:1;
+ unsigned dest_reg_nr:8;
+ unsigned dest_horiz_stride:2;
+ unsigned dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned pad0:6;
+ unsigned dest_writemask:4;
+ int dest_indirect_offset:6;
+ unsigned dest_subreg_nr:3;
+ unsigned dest_horiz_stride:2;
+ unsigned dest_address_mode:1;
+ } ia16;
+
+ struct {
+ unsigned dest_reg_file:2;
+ unsigned dest_reg_type:3;
+ unsigned src0_reg_file:2;
+ unsigned src0_reg_type:3;
+ unsigned src1_reg_file:2;
+ unsigned src1_reg_type:3;
+ unsigned pad:1;
+
+ int jump_count:16;
+ } branch_gen6;
+
+ struct {
+ unsigned dest_reg_file:1;
+ unsigned flag_subreg_nr:1;
+ unsigned flag_reg_nr:1;
+ unsigned pad0:1;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned src2_abs:1;
+ unsigned src2_negate:1;
+ unsigned src_reg_type:2;
+ unsigned dest_reg_type:2;
+ unsigned pad1:1;
+ unsigned nib_ctrl:1;
+ unsigned pad2:1;
+ unsigned dest_writemask:4;
+ unsigned dest_subreg_nr:3;
+ unsigned dest_reg_nr:8;
+ } da3src;
+
+ uint32_t ud;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ unsigned src0_subreg_nr:5;
+ unsigned src0_reg_nr:8;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_horiz_stride:2;
+ unsigned src0_width:3;
+ unsigned src0_vert_stride:4;
+ unsigned flag_subreg_nr:1;
+ unsigned flag_reg_nr:1;
+ unsigned pad:5;
+ } da1;
+
+ struct
+ {
+ int src0_indirect_offset:10;
+ unsigned src0_subreg_nr:3;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_horiz_stride:2;
+ unsigned src0_width:3;
+ unsigned src0_vert_stride:4;
+ unsigned flag_subreg_nr:1;
+ unsigned flag_reg_nr:1;
+ unsigned pad:5;
+ } ia1;
+
+ struct
+ {
+ unsigned src0_swz_x:2;
+ unsigned src0_swz_y:2;
+ unsigned src0_subreg_nr:1;
+ unsigned src0_reg_nr:8;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_swz_z:2;
+ unsigned src0_swz_w:2;
+ unsigned pad0:1;
+ unsigned src0_vert_stride:4;
+ unsigned flag_subreg_nr:1;
+ unsigned flag_reg_nr:1;
+ unsigned pad1:5;
+ } da16;
+
+ struct
+ {
+ unsigned src0_swz_x:2;
+ unsigned src0_swz_y:2;
+ int src0_indirect_offset:6;
+ unsigned src0_subreg_nr:3;
+ unsigned src0_abs:1;
+ unsigned src0_negate:1;
+ unsigned src0_address_mode:1;
+ unsigned src0_swz_z:2;
+ unsigned src0_swz_w:2;
+ unsigned pad0:1;
+ unsigned src0_vert_stride:4;
+ unsigned flag_subreg_nr:1;
+ unsigned flag_reg_nr:1;
+ unsigned pad1:5;
+ } ia16;
+
+ /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
+ *
+ * Does not apply to Gen6+. The SFID/message target moved to bits
+ * 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
+ */
+ struct
+ {
+ unsigned pad:26;
+ unsigned end_of_thread:1;
+ unsigned pad1:1;
+ unsigned sfid:4;
+ } send_gen5; /* for Ironlake only */
+
+ struct {
+ unsigned src0_rep_ctrl:1;
+ unsigned src0_swizzle:8;
+ unsigned src0_subreg_nr:3;
+ unsigned src0_reg_nr:8;
+ unsigned pad0:1;
+ unsigned src1_rep_ctrl:1;
+ unsigned src1_swizzle:8;
+ unsigned src1_subreg_nr_low:2;
+ } da3src;
+
+ uint32_t ud;
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ unsigned src1_subreg_nr:5;
+ unsigned src1_reg_nr:8;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned src1_address_mode:1;
+ unsigned src1_horiz_stride:2;
+ unsigned src1_width:3;
+ unsigned src1_vert_stride:4;
+ unsigned pad0:7;
+ } da1;
+
+ struct
+ {
+ unsigned src1_swz_x:2;
+ unsigned src1_swz_y:2;
+ unsigned src1_subreg_nr:1;
+ unsigned src1_reg_nr:8;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned src1_address_mode:1;
+ unsigned src1_swz_z:2;
+ unsigned src1_swz_w:2;
+ unsigned pad1:1;
+ unsigned src1_vert_stride:4;
+ unsigned pad2:7;
+ } da16;
+
+ struct
+ {
+ int src1_indirect_offset:10;
+ unsigned src1_subreg_nr:3;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned src1_address_mode:1;
+ unsigned src1_horiz_stride:2;
+ unsigned src1_width:3;
+ unsigned src1_vert_stride:4;
+ unsigned pad1:7;
+ } ia1;
+
+ struct
+ {
+ unsigned src1_swz_x:2;
+ unsigned src1_swz_y:2;
+ int src1_indirect_offset:6;
+ unsigned src1_subreg_nr:3;
+ unsigned src1_abs:1;
+ unsigned src1_negate:1;
+ unsigned src1_address_mode:1;
+ unsigned src1_swz_z:2;
+ unsigned src1_swz_w:2;
+ unsigned pad1:1;
+ unsigned src1_vert_stride:4;
+ unsigned pad2:7;
+ } ia16;
+
+
+ struct
+ {
+ int jump_count:16; /* note: signed */
+ unsigned pop_count:4;
+ unsigned pad0:12;
+ } if_else;
+
+ /* This is also used for gen7 IF/ELSE instructions */
+ struct
+ {
+ /* Signed jump distance to the ip to jump to if all channels
+ * are disabled after the break or continue. It should point
+ * to the end of the innermost control flow block, as that's
+ * where some channel could get re-enabled.
+ */
+ int jip:16;
+
+ /* Signed jump distance to the location to resume execution
+ * of this channel if it's enabled for the break or continue.
+ */
+ int uip:16;
+ } break_cont;
+
+ int JIP; /* used by Gen6 CALL instructions; Gen7 JMPI */
+
+ /**
+ * \defgroup SEND instructions / Message Descriptors
+ *
+ * @{
+ */
+
+ /**
+ * Generic Message Descriptor for Gen4 SEND instructions. The structs
+ * below expand function_control to something specific for their
+ * message. Due to struct packing issues, they duplicate these bits.
+ *
+ * See the G45 PRM, Volume 4, Table 14-15.
+ */
+ struct {
+ unsigned function_control:16;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } generic;
+
+ /**
+ * Generic Message Descriptor for Gen5-7 SEND instructions.
+ *
+ * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most
+ * of the information on the SEND instruction is missing from the public
+ * Ironlake PRM.)
+ *
+ * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
+ * According to the SEND instruction description:
+ * "The MSb of the message description, the EOT field, always comes from
+ * bit 127 of the instruction word"...which is bit 31 of this field.
+ */
+ struct {
+ unsigned function_control:19;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } generic_gen5;
+
+ struct {
+ unsigned opcode:1;
+ unsigned requester_type:1;
+ unsigned pad:2;
+ unsigned resource_select:1;
+ unsigned pad1:11;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad2:3;
+ unsigned end_of_thread:1;
+ } thread_spawner;
+
+ struct {
+ unsigned opcode:1;
+ unsigned requester_type:1;
+ unsigned pad0:2;
+ unsigned resource_select:1;
+ unsigned pad1:14;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad2:2;
+ unsigned end_of_thread:1;
+ } thread_spawner_gen5;
+
+ /** G45 PRM, Volume 4, Section 6.1.1.1 */
+ struct {
+ unsigned function:4;
+ unsigned int_type:1;
+ unsigned precision:1;
+ unsigned saturate:1;
+ unsigned data_type:1;
+ unsigned pad0:8;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } math;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
+ struct {
+ unsigned function:4;
+ unsigned int_type:1;
+ unsigned precision:1;
+ unsigned saturate:1;
+ unsigned data_type:1;
+ unsigned snapshot:1;
+ unsigned pad0:10;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } math_gen5;
+
+ /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned sampler:4;
+ unsigned return_format:2;
+ unsigned msg_type:2;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } sampler;
+
+ /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned sampler:4;
+ unsigned msg_type:4;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } sampler_g4x;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned sampler:4;
+ unsigned msg_type:4;
+ unsigned simd_mode:2;
+ unsigned pad0:1;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } sampler_gen5;
+
+ struct {
+ unsigned binding_table_index:8;
+ unsigned sampler:4;
+ unsigned msg_type:5;
+ unsigned simd_mode:2;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } sampler_gen7;
+
+ struct brw_urb_immediate urb;
+
+ struct {
+ unsigned opcode:4;
+ unsigned offset:6;
+ unsigned swizzle_control:2;
+ unsigned pad:1;
+ unsigned allocate:1;
+ unsigned used:1;
+ unsigned complete:1;
+ unsigned pad0:3;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } urb_gen5;
+
+ struct {
+ unsigned opcode:3;
+ unsigned offset:11;
+ unsigned swizzle_control:1;
+ unsigned complete:1;
+ unsigned per_slot_offset:1;
+ unsigned pad0:2;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } urb_gen7;
+
+ struct {
+ unsigned binding_table_index:8;
+ unsigned search_path_index:3;
+ unsigned lut_subindex:2;
+ unsigned message_type:2;
+ unsigned pad0:4;
+ unsigned header_present:1;
+ } vme_gen6;
+
+ struct {
+ unsigned binding_table_index:8;
+ unsigned pad0:5;
+ unsigned message_type:2;
+ unsigned pad1:4;
+ unsigned header_present:1;
+ } cre_gen75;
+
+ /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:4;
+ unsigned msg_type:2;
+ unsigned target_cache:2;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } dp_read;
+
+ /** G45 PRM, Volume 4, Section 5.10.1.1.2 */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:3;
+ unsigned msg_type:3;
+ unsigned target_cache:2;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } dp_read_g4x;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:4;
+ unsigned msg_type:2;
+ unsigned target_cache:2;
+ unsigned pad0:3;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } dp_read_gen5;
+
+ /** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:3;
+ unsigned last_render_target:1;
+ unsigned msg_type:3;
+ unsigned send_commit_msg:1;
+ unsigned response_length:4;
+ unsigned msg_length:4;
+ unsigned msg_target:4;
+ unsigned pad1:3;
+ unsigned end_of_thread:1;
+ } dp_write;
+
+ /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:3;
+ unsigned last_render_target:1;
+ unsigned msg_type:3;
+ unsigned send_commit_msg:1;
+ unsigned pad0:3;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } dp_write_gen5;
+
+ /**
+ * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
+ *
+ * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
+ **/
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:5;
+ unsigned msg_type:3;
+ unsigned pad0:3;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } gen6_dp_sampler_const_cache;
+
+ /**
+ * Message for the Sandybridge Render Cache Data Port.
+ *
+ * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
+ * Section 3.9.2.1.1: Message Descriptor.
+ *
+ * "Slot Group Select" and "Last Render Target" are part of the
+ * 5-bit message control for Render Target Write messages. See
+ * Section 3.9.9.2.1 of the same volume.
+ */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:5;
+ unsigned msg_type:4;
+ unsigned send_commit_msg:1;
+ unsigned pad0:1;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad1:2;
+ unsigned end_of_thread:1;
+ } gen6_dp;
+
+ /**
+ * Message for any of the Gen7 Data Port caches.
+ *
+ * Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
+ * Data Port Messages / Message Descriptor. Once again, "Slot Group
+ * Select" and "Last Render Target" are part of the 6-bit message
+ * control for Render Target Writes.
+ */
+ struct {
+ unsigned binding_table_index:8;
+ unsigned msg_control:6;
+ unsigned msg_type:4;
+ unsigned category:1;
+ unsigned header_present:1;
+ unsigned response_length:5;
+ unsigned msg_length:4;
+ unsigned pad2:2;
+ unsigned end_of_thread:1;
+ } gen7_dp;
+ /** @} */
+
+ struct {
+ unsigned src1_subreg_nr_high:1;
+ unsigned src1_reg_nr:8;
+ unsigned pad0:1;
+ unsigned src2_rep_ctrl:1;
+ unsigned src2_swizzle:8;
+ unsigned src2_subreg_nr:3;
+ unsigned src2_reg_nr:8;
+ unsigned pad1:2;
+ } da3src;
+
+ int d;
+ unsigned ud;
+ float f;
+ } bits3;
+};
+
+struct brw_compact_instruction {
+ struct {
+ unsigned opcode:7; /* 0- 6 */
+ unsigned debug_control:1; /* 7- 7 */
+ unsigned control_index:5; /* 8-12 */
+ unsigned data_type_index:5; /* 13-17 */
+ unsigned sub_reg_index:5; /* 18-22 */
+ unsigned acc_wr_control:1; /* 23-23 */
+ unsigned conditionalmod:4; /* 24-27 */
+ unsigned flag_subreg_nr:1; /* 28-28 */
+ unsigned cmpt_ctrl:1; /* 29-29 */
+ unsigned src0_index:2; /* 30-31 */
+ } dw0;
+
+ struct {
+ unsigned src0_index:3; /* 32-24 */
+ unsigned src1_index:5; /* 35-39 */
+ unsigned dst_reg_nr:8; /* 40-47 */
+ unsigned src0_reg_nr:8; /* 48-55 */
+ unsigned src1_reg_nr:8; /* 56-63 */
+ } dw1;
+};
+
+#endif
diff --git a/assembler/disasm-main.c b/assembler/disasm-main.c
new file mode 100644
index 0000000..5bc75af
--- /dev/null
+++ b/assembler/disasm-main.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+
+#include "gen4asm.h"
+#include "brw_eu.h"
+
+static const struct option longopts[] = {
+ { NULL, 0, NULL, 0 }
+};
+
+static struct brw_program *
+read_program (FILE *input)
+{
+ uint32_t inst[4];
+ struct brw_program *program;
+ struct brw_program_instruction *entry, **prev;
+ int c;
+ int n = 0;
+
+ program = malloc (sizeof (struct brw_program));
+ program->first = NULL;
+ prev = &program->first;
+ while ((c = getc (input)) != EOF) {
+ if (c == '0') {
+ if (fscanf (input, "x%x", &inst[n]) == 1) {
+ ++n;
+ if (n == 4) {
+ entry = malloc (sizeof (struct brw_program_instruction));
+ memcpy (&entry->insn, inst, 4 * sizeof (uint32_t));
+ entry->next = NULL;
+ *prev = entry;
+ prev = &entry->next;
+ n = 0;
+ }
+ }
+ }
+ }
+ return program;
+}
+
+static struct brw_program *
+read_program_binary (FILE *input)
+{
+ uint32_t temp;
+ uint8_t inst[16];
+ struct brw_program *program;
+ struct brw_program_instruction *entry, **prev;
+ int c;
+ int n = 0;
+
+ program = malloc (sizeof (struct brw_program));
+ program->first = NULL;
+ prev = &program->first;
+ while ((c = getc (input)) != EOF) {
+ if (c == '0') {
+ if (fscanf (input, "x%2x", &temp) == 1) {
+ inst[n++] = (uint8_t)temp;
+ if (n == 16) {
+ entry = malloc (sizeof (struct brw_program_instruction));
+ memcpy (&entry->insn, inst, 16 * sizeof (uint8_t));
+ entry->next = NULL;
+ *prev = entry;
+ prev = &entry->next;
+ n = 0;
+ }
+ }
+ }
+ }
+ return program;
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "usage: intel-gen4disasm [options] inputfile\n");
+ fprintf(stderr, "\t-b, --binary C style binary output\n");
+ fprintf(stderr, "\t-o, --output {outputfile} Specify output file\n");
+ fprintf(stderr, "\t-g, --gen <4|5|6|7> Specify GPU generation\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct brw_program *program;
+ FILE *input = stdin;
+ FILE *output = stdout;
+ char *input_filename = NULL;
+ char *output_file = NULL;
+ int byte_array_input = 0;
+ int o;
+ int gen = 4;
+ struct brw_program_instruction *inst;
+
+ while ((o = getopt_long(argc, argv, "o:bg:", longopts, NULL)) != -1) {
+ switch (o) {
+ case 'o':
+ if (strcmp(optarg, "-") != 0)
+ output_file = optarg;
+ break;
+ case 'b':
+ byte_array_input = 1;
+ break;
+ case 'g':
+ gen = strtol(optarg, NULL, 10);
+
+ if (gen < 4 || gen > 7) {
+ usage();
+ exit(1);
+ }
+
+ break;
+ default:
+ usage();
+ exit(1);
+ }
+ }
+ argc -= optind;
+ argv += optind;
+ if (argc != 1) {
+ usage();
+ exit(1);
+ }
+
+ if (strcmp(argv[0], "-") != 0) {
+ input_filename = argv[0];
+ input = fopen(input_filename, "r");
+ if (input == NULL) {
+ perror("Couldn't open input file");
+ exit(1);
+ }
+ }
+ if (byte_array_input)
+ program = read_program_binary (input);
+ else
+ program = read_program (input);
+ if (!program)
+ exit (1);
+ if (output_file) {
+ output = fopen (output_file, "w");
+ if (output == NULL) {
+ perror("Couldn't open output file");
+ exit(1);
+ }
+ }
+
+ for (inst = program->first; inst; inst = inst->next)
+ brw_disasm (output, &inst->insn.gen, gen);
+ exit (0);
+}
diff --git a/assembler/doc/Makefile.am b/assembler/doc/Makefile.am
new file mode 100644
index 0000000..257fc38
--- /dev/null
+++ b/assembler/doc/Makefile.am
@@ -0,0 +1,3 @@
+EXTRA_DIST = \
+ examples/packed_yuv_sf.g4a \
+ examples/packed_yuv_wm.g4a
diff --git a/assembler/doc/examples/packed_yuv_sf.g4a b/assembler/doc/examples/packed_yuv_sf.g4a
new file mode 100644
index 0000000..8c1398f
--- /dev/null
+++ b/assembler/doc/examples/packed_yuv_sf.g4a
@@ -0,0 +1,17 @@
+send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
+mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
+mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/assembler/doc/examples/packed_yuv_wm.g4a b/assembler/doc/examples/packed_yuv_wm.g4a
new file mode 100644
index 0000000..d312d17
--- /dev/null
+++ b/assembler/doc/examples/packed_yuv_wm.g4a
@@ -0,0 +1,161 @@
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ * X0_R is g4
+ * X1_R is g5
+ * Y0_R is g6
+ * Y1_R is g7
+ */
+
+ /* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each
+ * subspan is a 2x2 rectangle, and the screen x/y of the upper left of each
+ * subspan are given in GRF register 1.2 through 1.5 (which, with the word
+ * addressing below, are 1.4 through 1.11).
+ *
+ * The result is WM_X*_R and WM_Y*R being:
+ *
+ * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y}
+ * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1}
+ * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y}
+ * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1}
+ */
+
+ /* Set up ss0.x coordinates*/
+mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 };
+mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
+ /* add in texture X offset */
+add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 };
+mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 };
+ /* add in texture Y offset */
+add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 };
+ /* sampler */
+mov (8) m1<1>F g4<8,8,1>F { align1 };
+mov (8) m2<1>F g5<8,8,1>F { align1 };
+mov (8) m3<1>F g6<8,8,1>F { align1 };
+mov (8) m4<1>F g7<8,8,1>F { align1 };
+
+ /*
+ * g0 holds the PS thread payload, which (oddly) contains
+ * precisely what the sampler wants to see in m0
+ */
+send (16) 0 g12<1>UW g0<8,8,1>UW sampler (1,0,F) mlen 5 rlen 8 { align1 };
+mov (8) g19<1>UW g19<8,8,1>UW { align1 };
+
+ /* color space conversion function:
+ * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1)
+ * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1)
+ * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1)
+ *
+ * Y is g14, g15.
+ * Cr is g12, g13.
+ * Cb is g16, g17.
+ *
+ * R is g2, g6.
+ * G is g3, g7.
+ * B is g4, g8.
+ */
+ /* Y = Y - 16/255 */
+add (8) g14<1>F g14<8,8,1>F -0.0627451F { align1 };
+ /* Cr = Cr - 128/255 */
+add (8) g12<1>F g12<8,8,1>F -0.501961F { align1 };
+ /* Cb = Cb - 128 / 255 */
+add (8) g16<1>F g16<8,8,1>F -0.501961F { align1 };
+ /* Y = Y * 1.164 */
+mul (8) g14<1>F g14<8,8,1>F 1.164F { align1 };
+ /* acc = 1.596 * Cr */
+mul (8) null g12<8,8,1>F 1.596F { align1 };
+ /* R = acc + Y */
+mac.sat (8) m2<1>F g14<8,8,1>F 1F { align1 };
+ /* acc = Cr * -0.813 */
+mul (8) null g12<8,8,1>F -0.813F { align1 };
+ /* acc += Cb * -0.392 */
+mac (8) null g16<8,8,1>F -0.392F { align1 };
+ /* G = acc + Y */
+mac.sat (8) m3<1>F g14<8,8,1>F 1F { align1 };
+ /* acc = Cb * 2.017 */
+mul (8) null g16<8,8,1>F 2.017F { align1 };
+ /* B = acc + Y */
+mac.sat (8) m4<1>F g14<8,8,1>F 1F { align1 };
+ /* and do it again */
+add (8) g15<1>F g15<8,8,1>F -0.0627451F { align1 };
+add (8) g13<1>F g13<8,8,1>F -0.501961F { align1 };
+add (8) g17<1>F g17<8,8,1>F -0.501961F { align1 };
+mul (8) g15<1>F g15<8,8,1>F 1.164F { align1 };
+mul (8) null g13<8,8,1>F 1.596F { align1 };
+mac.sat (8) m6<1>F g15<8,8,1>F 1F { align1 };
+mul (8) null g13<8,8,1>F -0.813F { align1 };
+mac (8) null g17<8,8,1>F -0.392F { align1 };
+mac.sat (8) m7<1>F g15<8,8,1>F 1F { align1 };
+mul (8) null g17<8,8,1>F 2.017F { align1 };
+mac.sat (8) m8<1>F g15<8,8,1>F 1F { align1 };
+
+ /* Pass through control information:
+ */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+ /* Send framebuffer write message: XXX: acc0? */
+send (16) 0 null g0<8,8,1>UW write (
+ 0, /* binding table index 0 */
+ 8, /* pixel scoreboard clear */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ ) mlen 10 rlen 0 { align1 EOT };
+ /* padding */
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/assembler/gen4asm.h b/assembler/gen4asm.h
new file mode 100644
index 0000000..dca7f0f
--- /dev/null
+++ b/assembler/gen4asm.h
@@ -0,0 +1,227 @@
+/* -*- c-basic-offset: 8 -*- */
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#ifndef __GEN4ASM_H__
+#define __GEN4ASM_H__
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "brw_reg.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
+
+extern long int gen_level;
+extern int advanced_flag;
+extern int errors;
+
+#define WARN_ALWAYS (1 << 0)
+#define WARN_ALL (1 << 31)
+extern unsigned int warning_flags;
+
+extern char *input_filename;
+
+extern struct brw_context genasm_context;
+extern struct brw_compile genasm_compile;
+
+/* Predicate for Gen X and above */
+#define IS_GENp(x) (gen_level >= (x)*10)
+
+/* Predicate for Gen X exactly */
+#define IS_GENx(x) (gen_level >= (x)*10 && gen_level < ((x)+1)*10)
+
+/* Predicate to match Haswell processors */
+#define IS_HASWELL(x) (gen_level == 75)
+
+void yyerror (char *msg);
+
+#define STRUCT_SIZE_ASSERT(TYPE, SIZE) \
+typedef struct { \
+ char compile_time_assert_ ## TYPE ## _size[ \
+ (sizeof (struct TYPE) == (SIZE)) ? 1 : -1]; \
+ } _ ## TYPE ## SizeCheck
+
+/* ensure nobody changes the size of struct brw_instruction */
+STRUCT_SIZE_ASSERT(brw_instruction, 16);
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+struct condition {
+ int cond;
+ int flag_reg_nr;
+ int flag_subreg_nr;
+};
+
+struct predicate {
+ unsigned pred_control:4;
+ unsigned pred_inverse:1;
+ unsigned flag_reg_nr:1;
+ unsigned flag_subreg_nr:1;
+};
+
+struct options {
+ unsigned access_mode:1;
+ unsigned compression_control:2; /* gen6: quater control */
+ unsigned thread_control:2;
+ unsigned dependency_control:2;
+ unsigned mask_control:1;
+ unsigned debug_control:1;
+ unsigned acc_wr_control:1;
+
+ unsigned end_of_thread:1;
+};
+
+struct region {
+ int vert_stride, width, horiz_stride;
+ int is_default;
+};
+struct regtype {
+ int type;
+ int is_default;
+};
+
+/**
+ * This structure is the internal representation of source operands in the
+ * parser.
+ */
+struct src_operand {
+ struct brw_reg reg;
+ int default_region;
+ uint32_t imm32; /* set if src_operand is expressing a branch offset */
+ char *reloc_target; /* bspec: branching instructions JIP and UIP are source operands */
+} src_operand;
+
+typedef struct {
+ enum {
+ imm32_d, imm32_f
+ } r;
+ union {
+ uint32_t d;
+ float f;
+ int32_t signed_d;
+ } u;
+} imm32_t;
+
+enum assembler_instruction_type {
+ GEN4ASM_INSTRUCTION_GEN,
+ GEN4ASM_INSTRUCTION_GEN_RELOCATABLE,
+ GEN4ASM_INSTRUCTION_LABEL,
+};
+
+struct label_instruction {
+ char *name;
+};
+
+struct relocation {
+ char *first_reloc_target, *second_reloc_target; // JIP and UIP respectively
+ int first_reloc_offset, second_reloc_offset; // in number of instructions
+};
+
+/**
+ * This structure is just the list container for instructions accumulated by
+ * the parser and labels.
+ */
+struct brw_program_instruction {
+ enum assembler_instruction_type type;
+ unsigned inst_offset;
+ union {
+ struct brw_instruction gen;
+ struct label_instruction label;
+ } insn;
+ struct relocation reloc;
+ struct brw_program_instruction *next;
+};
+
+static inline bool is_label(struct brw_program_instruction *instruction)
+{
+ return instruction->type == GEN4ASM_INSTRUCTION_LABEL;
+}
+
+static inline char *label_name(struct brw_program_instruction *i)
+{
+ assert(is_label(i));
+ return i->insn.label.name;
+}
+
+static inline bool is_relocatable(struct brw_program_instruction *intruction)
+{
+ return intruction->type == GEN4ASM_INSTRUCTION_GEN_RELOCATABLE;
+}
+
+/**
+ * This structure is a list of instructions. It is the final output of the
+ * parser.
+ */
+struct brw_program {
+ struct brw_program_instruction *first;
+ struct brw_program_instruction *last;
+};
+
+extern struct brw_program compiled_program;
+
+#define TYPE_B_INDEX 0
+#define TYPE_UB_INDEX 1
+#define TYPE_W_INDEX 2
+#define TYPE_UW_INDEX 3
+#define TYPE_D_INDEX 4
+#define TYPE_UD_INDEX 5
+#define TYPE_F_INDEX 6
+
+#define TOTAL_TYPES 7
+
+struct program_defaults {
+ int execute_size;
+ int execute_type[TOTAL_TYPES];
+ int register_type;
+ int register_type_regfile;
+ struct region source_region;
+ struct region source_region_type[TOTAL_TYPES];
+ struct region dest_region;
+ struct region dest_region_type[TOTAL_TYPES];
+};
+extern struct program_defaults program_defaults;
+
+struct declared_register {
+ char *name;
+ struct brw_reg reg;
+ int element_size;
+ struct region src_region;
+ int dst_region;
+};
+struct declared_register *find_register(char *name);
+void insert_register(struct declared_register *reg);
+
+int yyparse(void);
+int yylex(void);
+int yylex_destroy(void);
+
+char *
+lex_text(void);
+
+#endif /* __GEN4ASM_H__ */
diff --git a/assembler/gram.y b/assembler/gram.y
new file mode 100644
index 0000000..50d71d1
--- /dev/null
+++ b/assembler/gram.y
@@ -0,0 +1,3035 @@
+%{
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <assert.h>
+#include "gen4asm.h"
+#include "brw_eu.h"
+
+#define DEFAULT_EXECSIZE (ffs(program_defaults.execute_size) - 1)
+#define DEFAULT_DSTREGION -1
+
+#define SWIZZLE(reg) (reg.dw1.bits.swizzle)
+
+#define GEN(i) (&(i)->insn.gen)
+
+#define YYLTYPE YYLTYPE
+typedef struct YYLTYPE
+{
+ int first_line;
+ int first_column;
+ int last_line;
+ int last_column;
+} YYLTYPE;
+
+extern int need_export;
+static struct src_operand src_null_reg =
+{
+ .reg.file = BRW_ARCHITECTURE_REGISTER_FILE,
+ .reg.nr = BRW_ARF_NULL,
+ .reg.type = BRW_REGISTER_TYPE_UD,
+};
+static struct brw_reg dst_null_reg =
+{
+ .file = BRW_ARCHITECTURE_REGISTER_FILE,
+ .nr = BRW_ARF_NULL,
+};
+static struct brw_reg ip_dst =
+{
+ .file = BRW_ARCHITECTURE_REGISTER_FILE,
+ .nr = BRW_ARF_IP,
+ .type = BRW_REGISTER_TYPE_UD,
+ .address_mode = BRW_ADDRESS_DIRECT,
+ .hstride = 1,
+ .dw1.bits.writemask = BRW_WRITEMASK_XYZW,
+};
+static struct src_operand ip_src =
+{
+ .reg.file = BRW_ARCHITECTURE_REGISTER_FILE,
+ .reg.nr = BRW_ARF_IP,
+ .reg.type = BRW_REGISTER_TYPE_UD,
+ .reg.address_mode = BRW_ADDRESS_DIRECT,
+ .reg.dw1.bits.swizzle = BRW_SWIZZLE_NOOP,
+};
+
+static int get_type_size(unsigned type);
+static void set_instruction_opcode(struct brw_program_instruction *instr,
+ unsigned opcode);
+static int set_instruction_dest(struct brw_program_instruction *instr,
+ struct brw_reg *dest);
+static int set_instruction_src0(struct brw_program_instruction *instr,
+ struct src_operand *src,
+ YYLTYPE *location);
+static int set_instruction_src1(struct brw_program_instruction *instr,
+ struct src_operand *src,
+ YYLTYPE *location);
+static int set_instruction_dest_three_src(struct brw_program_instruction *instr,
+ struct brw_reg *dest);
+static int set_instruction_src0_three_src(struct brw_program_instruction *instr,
+ struct src_operand *src);
+static int set_instruction_src1_three_src(struct brw_program_instruction *instr,
+ struct src_operand *src);
+static int set_instruction_src2_three_src(struct brw_program_instruction *instr,
+ struct src_operand *src);
+static void set_instruction_saturate(struct brw_program_instruction *instr,
+ int saturate);
+static void set_instruction_options(struct brw_program_instruction *instr,
+ struct options options);
+static void set_instruction_predicate(struct brw_program_instruction *instr,
+ struct predicate *p);
+static void set_instruction_pred_cond(struct brw_program_instruction *instr,
+ struct predicate *p,
+ struct condition *c,
+ YYLTYPE *location);
+static void set_direct_dst_operand(struct brw_reg *dst, struct brw_reg *reg,
+ int type);
+static void set_direct_src_operand(struct src_operand *src, struct brw_reg *reg,
+ int type);
+
+enum message_level {
+ WARN,
+ ERROR,
+};
+
+static void message(enum message_level level, YYLTYPE *location,
+ const char *fmt, ...)
+{
+ static const char *level_str[] = { "warning", "error" };
+ va_list args;
+
+ if (location)
+ fprintf(stderr, "%s:%d:%d: %s: ", input_filename, location->first_line,
+ location->first_column, level_str[level]);
+ else
+ fprintf(stderr, "%s:%s: ", input_filename, level_str[level]);
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+}
+
+#define warn(flag, l, fmt, ...) \
+ do { \
+ if (warning_flags & WARN_ ## flag) \
+ message(WARN, l, fmt, ## __VA_ARGS__); \
+ } while(0)
+
+#define error(l, fmt, ...) \
+ do { \
+ message(ERROR, l, fmt, ## __VA_ARGS__); \
+ } while(0)
+
+/* like strcmp, but handles NULL pointers */
+static bool strcmp0(const char *s1, const char* s2)
+{
+ if (!s1)
+ return -(s1 != s2);
+ if (!s2)
+ return s1 != s2;
+ return strcmp (s1, s2);
+}
+
+static bool region_equal(struct region *r1, struct region *r2)
+{
+ return memcmp(r1, r2, sizeof(struct region)) == 0;
+}
+
+static bool reg_equal(struct brw_reg *r1, struct brw_reg *r2)
+{
+ return memcmp(r1, r2, sizeof(struct brw_reg)) == 0;
+}
+
+static bool declared_register_equal(struct declared_register *r1,
+ struct declared_register *r2)
+{
+ if (strcmp0(r1->name, r2->name) != 0)
+ return false;
+
+ if (!reg_equal(&r1->reg, &r2->reg))
+ return false;
+
+ if (!region_equal(&r1->src_region, &r2->src_region))
+ return false;
+
+ if (r1->element_size != r2->element_size ||
+ r1->dst_region != r2->dst_region)
+ return false;
+
+ return true;
+}
+
+static void brw_program_init(struct brw_program *p)
+{
+ memset(p, 0, sizeof(struct brw_program));
+}
+
+static void brw_program_append_entry(struct brw_program *p,
+ struct brw_program_instruction *entry)
+{
+ entry->next = NULL;
+ if (p->last)
+ p->last->next = entry;
+ else
+ p->first = entry;
+ p->last = entry;
+}
+
+static void
+brw_program_add_instruction(struct brw_program *p,
+ struct brw_program_instruction *instruction)
+{
+ struct brw_program_instruction *list_entry;
+
+ list_entry = calloc(sizeof(struct brw_program_instruction), 1);
+ list_entry->type = GEN4ASM_INSTRUCTION_GEN;
+ list_entry->insn.gen = instruction->insn.gen;
+ brw_program_append_entry(p, list_entry);
+}
+
+static void
+brw_program_add_relocatable(struct brw_program *p,
+ struct brw_program_instruction *instruction)
+{
+ struct brw_program_instruction *list_entry;
+
+ list_entry = calloc(sizeof(struct brw_program_instruction), 1);
+ list_entry->type = GEN4ASM_INSTRUCTION_GEN_RELOCATABLE;
+ list_entry->insn.gen = instruction->insn.gen;
+ list_entry->reloc = instruction->reloc;
+ brw_program_append_entry(p, list_entry);
+}
+
+static void brw_program_add_label(struct brw_program *p, const char *label)
+{
+ struct brw_program_instruction *list_entry;
+
+ list_entry = calloc(sizeof(struct brw_program_instruction), 1);
+ list_entry->type = GEN4ASM_INSTRUCTION_LABEL;
+ list_entry->insn.label.name = strdup(label);
+ brw_program_append_entry(p, list_entry);
+}
+
+static int resolve_dst_region(struct declared_register *reference, int region)
+{
+ int resolved = region;
+
+ if (resolved == DEFAULT_DSTREGION) {
+ if (reference)
+ resolved = reference->dst_region;
+ else
+ resolved = 1;
+ }
+
+ assert(resolved == 1 || resolved == 2 || resolved == 3);
+ return resolved;
+}
+
+static bool validate_dst_reg(struct brw_instruction *insn, struct brw_reg *reg)
+{
+
+ if (reg->address_mode == BRW_ADDRESS_DIRECT &&
+ insn->header.access_mode == BRW_ALIGN_1 &&
+ reg->dw1.bits.writemask != 0 &&
+ reg->dw1.bits.writemask != BRW_WRITEMASK_XYZW)
+ {
+ fprintf(stderr, "error: write mask set in align1 instruction\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool validate_src_reg(struct brw_instruction *insn,
+ struct brw_reg reg,
+ YYLTYPE *location)
+{
+ int hstride_for_reg[] = {0, 1, 2, 4};
+ int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
+ int width_for_reg[] = {1, 2, 4, 8, 16};
+ int execsize_for_reg[] = {1, 2, 4, 8, 16, 32};
+ int width, hstride, vstride, execsize;
+
+ if (reg.file == BRW_IMMEDIATE_VALUE)
+ return true;
+
+ if (insn->header.access_mode == BRW_ALIGN_1 &&
+ SWIZZLE(reg) && SWIZZLE(reg) != BRW_SWIZZLE_NOOP)
+ {
+ error(location, "swizzle bits set in align1 instruction\n");
+ return false;
+ }
+
+ assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
+ hstride = hstride_for_reg[reg.hstride];
+
+ if (reg.vstride == 0xf) {
+ vstride = -1;
+ } else {
+ assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
+ vstride = vstride_for_reg[reg.vstride];
+ }
+
+ assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
+ width = width_for_reg[reg.width];
+
+ assert(insn->header.execution_size >= 0 &&
+ insn->header.execution_size < ARRAY_SIZE(execsize_for_reg));
+ execsize = execsize_for_reg[insn->header.execution_size];
+
+ /* Register Region Restrictions */
+
+ /* B. If ExecSize = Width and HorzStride ≠ 0, VertStride must be set to
+ * Width * HorzStride. */
+ if (execsize == width && hstride != 0) {
+ if (vstride != -1 && vstride != width * hstride);
+ warn(ALL, location, "execution size == width and hstride != 0 but "
+ "vstride is not width * hstride\n");
+ }
+
+ /* D. If Width = 1, HorzStride must be 0 regardless of the values of
+ * ExecSize and VertStride.
+ *
+ * FIXME: In "advanced mode" hstride is set to 1, this is probably a bug
+ * to fix, but it changes the generated opcodes and thus needs validation.
+ */
+ if (width == 1 && hstride != 0)
+ warn(ALL, location, "region width is 1 but horizontal stride is %d "
+ " (should be 0)\n", hstride);
+
+ /* E. If ExecSize = Width = 1, both VertStride and HorzStride must be 0.
+ * This defines a scalar. */
+ if (execsize == 1 && width == 1) {
+ if (hstride != 0)
+ warn(ALL, location, "execution size and region width are 1 but "
+ "horizontal stride is %d (should be 0)\n", hstride);
+ if (vstride != 0)
+ warn(ALL, location, "execution size and region width are 1 but "
+ "vertical stride is %d (should be 0)\n", vstride);
+ }
+
+ return true;
+}
+
+static int get_subreg_address(unsigned regfile, unsigned type, unsigned subreg, unsigned address_mode)
+{
+ int unit_size = 1;
+
+ assert(address_mode == BRW_ADDRESS_DIRECT);
+ assert(regfile != BRW_IMMEDIATE_VALUE);
+
+ if (advanced_flag)
+ unit_size = get_type_size(type);
+
+ return subreg * unit_size;
+}
+
+/* only used in indirect address mode.
+ * input: sub-register number of an address register
+ * output: the value of AddrSubRegNum in the instruction binary code
+ *
+ * input output(advanced_flag==0) output(advanced_flag==1)
+ * a0.0 0 0
+ * a0.1 invalid input 1
+ * a0.2 1 2
+ * a0.3 invalid input 3
+ * a0.4 2 4
+ * a0.5 invalid input 5
+ * a0.6 3 6
+ * a0.7 invalid input 7
+ * a0.8 4 invalid input
+ * a0.10 5 invalid input
+ * a0.12 6 invalid input
+ * a0.14 7 invalid input
+ */
+static int get_indirect_subreg_address(unsigned subreg)
+{
+ return advanced_flag == 0 ? subreg / 2 : subreg;
+}
+
+static void resolve_subnr(struct brw_reg *reg)
+{
+ if (reg->file == BRW_IMMEDIATE_VALUE)
+ return;
+
+ if (reg->address_mode == BRW_ADDRESS_DIRECT)
+ reg->subnr = get_subreg_address(reg->file, reg->type, reg->subnr,
+ reg->address_mode);
+ else
+ reg->subnr = get_indirect_subreg_address(reg->subnr);
+}
+
+
+%}
+%locations
+
+%start ROOT
+
+%union {
+ char *string;
+ int integer;
+ double number;
+ struct brw_program_instruction instruction;
+ struct brw_program program;
+ struct region region;
+ struct regtype regtype;
+ struct brw_reg reg;
+ struct condition condition;
+ struct predicate predicate;
+ struct options options;
+ struct declared_register symbol_reg;
+ imm32_t imm32;
+
+ struct src_operand src_operand;
+}
+
+%token COLON
+%token SEMICOLON
+%token LPAREN RPAREN
+%token LANGLE RANGLE
+%token LCURLY RCURLY
+%token LSQUARE RSQUARE
+%token COMMA EQ
+%token ABS DOT
+%token PLUS MINUS MULTIPLY DIVIDE
+
+%token <integer> TYPE_UD TYPE_D TYPE_UW TYPE_W TYPE_UB TYPE_B
+%token <integer> TYPE_VF TYPE_HF TYPE_V TYPE_F
+
+%token ALIGN1 ALIGN16 SECHALF COMPR SWITCH ATOMIC NODDCHK NODDCLR
+%token MASK_DISABLE BREAKPOINT ACCWRCTRL EOT
+
+%token SEQ ANY2H ALL2H ANY4H ALL4H ANY8H ALL8H ANY16H ALL16H ANYV ALLV
+%token <integer> ZERO EQUAL NOT_ZERO NOT_EQUAL GREATER GREATER_EQUAL LESS LESS_EQUAL
+%token <integer> ROUND_INCREMENT OVERFLOW UNORDERED
+%token <integer> GENREG MSGREG ADDRESSREG ACCREG FLAGREG
+%token <integer> MASKREG AMASK IMASK LMASK CMASK
+%token <integer> MASKSTACKREG LMS IMS MASKSTACKDEPTHREG IMSD LMSD
+%token <integer> NOTIFYREG STATEREG CONTROLREG IPREG
+%token GENREGFILE MSGREGFILE
+
+%token <integer> MOV FRC RNDU RNDD RNDE RNDZ NOT LZD
+%token <integer> MUL MAC MACH LINE SAD2 SADA2 DP4 DPH DP3 DP2
+%token <integer> AVG ADD SEL AND OR XOR SHR SHL ASR CMP CMPN PLN
+%token <integer> ADDC BFI1 BFREV CBIT F16TO32 F32TO16 FBH FBL
+%token <integer> SEND NOP JMPI IF IFF WHILE ELSE BREAK CONT HALT MSAVE
+%token <integer> PUSH MREST POP WAIT DO ENDIF ILLEGAL
+%token <integer> MATH_INST
+%token <integer> MAD LRP BFE BFI2 SUBB
+%token <integer> CALL RET
+%token <integer> BRD BRC
+
+%token NULL_TOKEN MATH SAMPLER GATEWAY READ WRITE URB THREAD_SPAWNER VME DATA_PORT CRE
+
+%token MSGLEN RETURNLEN
+%token <integer> ALLOCATE USED COMPLETE TRANSPOSE INTERLEAVE
+%token SATURATE
+
+%token <integer> INTEGER
+%token <string> STRING
+%token <number> NUMBER
+
+%token <integer> INV LOG EXP SQRT RSQ POW SIN COS SINCOS INTDIV INTMOD
+%token <integer> INTDIVMOD
+%token SIGNED SCALAR
+
+%token <integer> X Y Z W
+
+%token <integer> KERNEL_PRAGMA END_KERNEL_PRAGMA CODE_PRAGMA END_CODE_PRAGMA
+%token <integer> REG_COUNT_PAYLOAD_PRAGMA REG_COUNT_TOTAL_PRAGMA DECLARE_PRAGMA
+%token <integer> BASE ELEMENTSIZE SRCREGION DSTREGION TYPE
+
+%token <integer> DEFAULT_EXEC_SIZE_PRAGMA DEFAULT_REG_TYPE_PRAGMA
+%nonassoc SUBREGNUM
+%nonassoc SNDOPR
+%left PLUS MINUS
+%left MULTIPLY DIVIDE
+%right UMINUS
+%nonassoc DOT
+%nonassoc STR_SYMBOL_REG
+%nonassoc EMPTEXECSIZE
+%nonassoc LPAREN
+
+%type <integer> exp sndopr
+%type <integer> simple_int
+%type <instruction> instruction unaryinstruction binaryinstruction
+%type <instruction> binaryaccinstruction trinaryinstruction sendinstruction
+%type <instruction> syncinstruction
+%type <instruction> msgtarget
+%type <instruction> mathinstruction
+%type <instruction> nopinstruction
+%type <instruction> relocatableinstruction breakinstruction
+%type <instruction> ifelseinstruction loopinstruction haltinstruction
+%type <instruction> multibranchinstruction subroutineinstruction jumpinstruction
+%type <string> label
+%type <program> instrseq
+%type <integer> instoption
+%type <integer> unaryop binaryop binaryaccop breakop
+%type <integer> trinaryop
+%type <condition> conditionalmodifier
+%type <predicate> predicate
+%type <options> instoptions instoption_list
+%type <integer> condition saturate negate abs chansel
+%type <integer> writemask_x writemask_y writemask_z writemask_w
+%type <integer> srcimmtype execsize dstregion immaddroffset
+%type <integer> subregnum sampler_datatype
+%type <integer> urb_swizzle urb_allocate urb_used urb_complete
+%type <integer> math_function math_signed math_scalar
+%type <integer> predctrl predstate
+%type <region> region region_wh indirectregion declare_srcregion;
+%type <regtype> regtype
+%type <reg> directgenreg directmsgreg addrreg accreg flagreg maskreg
+%type <reg> maskstackreg notifyreg
+/* %type <reg> maskstackdepthreg */
+%type <reg> statereg controlreg ipreg nullreg
+%type <reg> dstoperandex_typed srcarchoperandex_typed
+%type <reg> sendleadreg
+%type <reg> indirectgenreg indirectmsgreg addrparam
+%type <integer> mask_subreg maskstack_subreg
+%type <integer> declare_elementsize declare_dstregion declare_type
+/* %type <intger> maskstackdepth_subreg */
+%type <symbol_reg> symbol_reg symbol_reg_p;
+%type <imm32> imm32
+%type <reg> dst dstoperand dstoperandex dstreg post_dst writemask
+%type <reg> declare_base
+%type <src_operand> directsrcoperand srcarchoperandex directsrcaccoperand
+%type <src_operand> indirectsrcoperand
+%type <src_operand> src srcimm imm32reg payload srcacc srcaccimm swizzle
+%type <src_operand> relativelocation relativelocation2
+
+%code {
+
+#undef error
+#define error(l, fmt, ...) \
+ do { \
+ message(ERROR, l, fmt, ## __VA_ARGS__); \
+ YYERROR; \
+ } while(0)
+
+static void add_option(struct options *options, int option)
+{
+ switch (option) {
+ case ALIGN1:
+ options->access_mode = BRW_ALIGN_1;
+ break;
+ case ALIGN16:
+ options->access_mode = BRW_ALIGN_16;
+ break;
+ case SECHALF:
+ options->compression_control |= BRW_COMPRESSION_2NDHALF;
+ break;
+ case COMPR:
+ if (!IS_GENp(6))
+ options->compression_control |= BRW_COMPRESSION_COMPRESSED;
+ break;
+ case SWITCH:
+ options->thread_control |= BRW_THREAD_SWITCH;
+ break;
+ case ATOMIC:
+ options->thread_control |= BRW_THREAD_ATOMIC;
+ break;
+ case NODDCHK:
+ options->dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
+ break;
+ case NODDCLR:
+ options->dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
+ break;
+ case MASK_DISABLE:
+ options->mask_control = BRW_MASK_DISABLE;
+ break;
+ case BREAKPOINT:
+ options->debug_control = BRW_DEBUG_BREAKPOINT;
+ break;
+ case ACCWRCTRL:
+ options->acc_wr_control = BRW_ACCUMULATOR_WRITE_ENABLE;
+ break;
+ case EOT:
+ options->end_of_thread = 1;
+ break;
+ }
+}
+
+}
+
+%%
+simple_int: INTEGER { $$ = $1; }
+ | MINUS INTEGER { $$ = -$2;}
+;
+
+exp: INTEGER { $$ = $1; }
+ | exp PLUS exp { $$ = $1 + $3; }
+ | exp MINUS exp { $$ = $1 - $3; }
+ | exp MULTIPLY exp { $$ = $1 * $3; }
+ | exp DIVIDE exp { if ($3) $$ = $1 / $3; else YYERROR;}
+ | MINUS exp %prec UMINUS { $$ = -$2;}
+ | LPAREN exp RPAREN { $$ = $2; }
+ ;
+
+ROOT: instrseq
+ {
+ compiled_program = $1;
+ }
+;
+
+
+label: STRING COLON
+;
+
+declare_base: BASE EQ dstreg
+ {
+ $$ = $3;
+ }
+;
+declare_elementsize: ELEMENTSIZE EQ exp
+ {
+ $$ = $3;
+ }
+;
+declare_srcregion: /* empty */
+ {
+ /* XXX is this default correct?*/
+ memset (&$$, '\0', sizeof ($$));
+ $$.vert_stride = ffs(0);
+ $$.width = BRW_WIDTH_1;
+ $$.horiz_stride = ffs(0);
+ }
+ | SRCREGION EQ region
+ {
+ $$ = $3;
+ }
+;
+declare_dstregion: /* empty */
+ {
+ $$ = 1;
+ }
+ | DSTREGION EQ dstregion
+ {
+ $$ = $3;
+ }
+;
+declare_type: TYPE EQ regtype
+ {
+ $$ = $3.type;
+ }
+;
+declare_pragma: DECLARE_PRAGMA STRING declare_base declare_elementsize declare_srcregion declare_dstregion declare_type
+ {
+ struct declared_register reg, *found, *new_reg;
+
+ reg.name = $2;
+ reg.reg = $3;
+ reg.element_size = $4;
+ reg.src_region = $5;
+ reg.dst_region = $6;
+ reg.reg.type = $7;
+
+ found = find_register($2);
+ if (found) {
+ if (!declared_register_equal(&reg, found))
+ error(&@1, "%s already defined and definitions "
+ "don't agree\n", $2);
+ free($2); // $2 has been malloc'ed by strdup
+ } else {
+ new_reg = malloc(sizeof(struct declared_register));
+ *new_reg = reg;
+ insert_register(new_reg);
+ }
+ }
+;
+
+reg_count_total_pragma: REG_COUNT_TOTAL_PRAGMA exp
+;
+reg_count_payload_pragma: REG_COUNT_PAYLOAD_PRAGMA exp
+;
+
+default_exec_size_pragma: DEFAULT_EXEC_SIZE_PRAGMA exp
+ {
+ program_defaults.execute_size = $2;
+ }
+;
+default_reg_type_pragma: DEFAULT_REG_TYPE_PRAGMA regtype
+ {
+ program_defaults.register_type = $2.type;
+ }
+;
+pragma: reg_count_total_pragma
+ |reg_count_payload_pragma
+ |default_exec_size_pragma
+ |default_reg_type_pragma
+ |declare_pragma
+;
+
+instrseq: instrseq pragma
+ {
+ $$ = $1;
+ }
+ | instrseq instruction SEMICOLON
+ {
+ brw_program_add_instruction(&$1, &$2);
+ $$ = $1;
+ }
+ | instruction SEMICOLON
+ {
+ brw_program_init(&$$);
+ brw_program_add_instruction(&$$, &$1);
+ }
+ | instrseq relocatableinstruction SEMICOLON
+ {
+ brw_program_add_relocatable(&$1, &$2);
+ $$ = $1;
+ }
+ | relocatableinstruction SEMICOLON
+ {
+ brw_program_init(&$$);
+ brw_program_add_relocatable(&$$, &$1);
+ }
+ | instrseq SEMICOLON
+ {
+ $$ = $1;
+ }
+ | instrseq label
+ {
+ brw_program_add_label(&$1, $2);
+ $$ = $1;
+ }
+ | label
+ {
+ brw_program_init(&$$);
+ brw_program_add_label(&$$, $1);
+ }
+ | pragma
+ {
+ $$.first = NULL;
+ $$.last = NULL;
+ }
+ | instrseq error SEMICOLON {
+ $$ = $1;
+ }
+;
+
+/* 1.4.1: Instruction groups */
+// binaryinstruction: Source operands cannot be accumulators
+// binaryaccinstruction: Source operands can be accumulators
+instruction: unaryinstruction
+ | binaryinstruction
+ | binaryaccinstruction
+ | trinaryinstruction
+ | sendinstruction
+ | syncinstruction
+ | mathinstruction
+ | nopinstruction
+;
+
+/* relocatableinstruction are instructions that needs a relocation pass */
+relocatableinstruction: ifelseinstruction
+ | loopinstruction
+ | haltinstruction
+ | multibranchinstruction
+ | subroutineinstruction
+ | jumpinstruction
+ | breakinstruction
+;
+
+ifelseinstruction: ENDIF
+ {
+ // for Gen4
+ if(IS_GENp(6)) // For gen6+.
+ error(&@1, "should be 'ENDIF execsize relativelocation'\n");
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $1);
+ GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH;
+ GEN(&$$)->bits1.da1.dest_horiz_stride = 1;
+ GEN(&$$)->bits1.da1.src1_reg_file = BRW_ARCHITECTURE_REGISTER_FILE;
+ GEN(&$$)->bits1.da1.src1_reg_type = BRW_REGISTER_TYPE_UD;
+ }
+ | ENDIF execsize relativelocation instoptions
+ {
+ // for Gen6+
+ /* Gen6, Gen7 bspec: predication is prohibited */
+ if(!IS_GENp(6)) // for gen6-
+ error(&@1, "ENDIF Syntax error: should be 'ENDIF'\n");
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $1);
+ GEN(&$$)->header.execution_size = $2;
+ $$.reloc.first_reloc_target = $3.reloc_target;
+ $$.reloc.first_reloc_offset = $3.imm32;
+ }
+ | ELSE execsize relativelocation instoptions
+ {
+ if(!IS_GENp(6)) {
+ // for Gen4, Gen5. gen_level < 60
+ /* Set the istack pop count, which must always be 1. */
+ $3.imm32 |= (1 << 16);
+
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $1);
+ GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH;
+ ip_dst.width = $2;
+ set_instruction_dest(&$$, &ip_dst);
+ set_instruction_src0(&$$, &ip_src, NULL);
+ set_instruction_src1(&$$, &$3, NULL);
+ $$.reloc.first_reloc_target = $3.reloc_target;
+ $$.reloc.first_reloc_offset = $3.imm32;
+ } else if(IS_GENp(6)) {
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $1);
+ GEN(&$$)->header.execution_size = $2;
+ $$.reloc.first_reloc_target = $3.reloc_target;
+ $$.reloc.first_reloc_offset = $3.imm32;
+ } else {
+ error(&@1, "'ELSE' instruction is not implemented.\n");
+ }
+ }
+ | predicate IF execsize relativelocation
+ {
+ /* The branch instructions require that the IP register
+ * be the destination and first source operand, while the
+ * offset is the second source operand. The offset is added
+ * to the pre-incremented IP.
+ */
+ if(IS_GENp(7)) /* Error in Gen7+. */
+ error(&@2, "IF should be 'IF execsize JIP UIP'\n");
+
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+ if(!IS_GENp(6)) {
+ GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH;
+ ip_dst.width = $3;
+ set_instruction_dest(&$$, &ip_dst);
+ set_instruction_src0(&$$, &ip_src, NULL);
+ set_instruction_src1(&$$, &$4, NULL);
+ }
+ $$.reloc.first_reloc_target = $4.reloc_target;
+ $$.reloc.first_reloc_offset = $4.imm32;
+ }
+ | predicate IF execsize relativelocation relativelocation
+ {
+ /* for Gen7+ */
+ if(!IS_GENp(7))
+ error(&@2, "IF should be 'IF execsize relativelocation'\n");
+
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.execution_size = $3;
+ $$.reloc.first_reloc_target = $4.reloc_target;
+ $$.reloc.first_reloc_offset = $4.imm32;
+ $$.reloc.second_reloc_target = $5.reloc_target;
+ $$.reloc.second_reloc_offset = $5.imm32;
+ }
+;
+
+loopinstruction: predicate WHILE execsize relativelocation instoptions
+ {
+ if(!IS_GENp(6)) {
+ /* The branch instructions require that the IP register
+ * be the destination and first source operand, while the
+ * offset is the second source operand. The offset is added
+ * to the pre-incremented IP.
+ */
+ ip_dst.width = $3;
+ set_instruction_dest(&$$, &ip_dst);
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH;
+ set_instruction_src0(&$$, &ip_src, NULL);
+ set_instruction_src1(&$$, &$4, NULL);
+ $$.reloc.first_reloc_target = $4.reloc_target;
+ $$.reloc.first_reloc_offset = $4.imm32;
+ } else if (IS_GENp(6)) {
+ /* Gen6 spec:
+ dest must have the same element size as src0.
+ dest horizontal stride must be 1. */
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.execution_size = $3;
+ $$.reloc.first_reloc_target = $4.reloc_target;
+ $$.reloc.first_reloc_offset = $4.imm32;
+ } else {
+ error(&@2, "'WHILE' instruction is not implemented!\n");
+ }
+ }
+ | DO
+ {
+ // deprecated
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $1);
+ };
+
+haltinstruction: predicate HALT execsize relativelocation relativelocation instoptions
+ {
+ // for Gen6, Gen7
+ /* Gen6, Gen7 bspec: dst and src0 must be the null reg. */
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+ $$.reloc.first_reloc_target = $4.reloc_target;
+ $$.reloc.first_reloc_offset = $4.imm32;
+ $$.reloc.second_reloc_target = $5.reloc_target;
+ $$.reloc.second_reloc_offset = $5.imm32;
+ dst_null_reg.width = $3;
+ set_instruction_dest(&$$, &dst_null_reg);
+ set_instruction_src0(&$$, &src_null_reg, NULL);
+ };
+
+multibranchinstruction:
+ predicate BRD execsize relativelocation instoptions
+ {
+ /* Gen7 bspec: dest must be null. use Switch option */
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH;
+ $$.reloc.first_reloc_target = $4.reloc_target;
+ $$.reloc.first_reloc_offset = $4.imm32;
+ dst_null_reg.width = $3;
+ set_instruction_dest(&$$, &dst_null_reg);
+ }
+ | predicate BRC execsize relativelocation relativelocation instoptions
+ {
+ /* Gen7 bspec: dest must be null. src0 must be null. use Switch option */
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.thread_control |= BRW_THREAD_SWITCH;
+ $$.reloc.first_reloc_target = $4.reloc_target;
+ $$.reloc.first_reloc_offset = $4.imm32;
+ $$.reloc.second_reloc_target = $5.reloc_target;
+ $$.reloc.second_reloc_offset = $5.imm32;
+ dst_null_reg.width = $3;
+ set_instruction_dest(&$$, &dst_null_reg);
+ set_instruction_src0(&$$, &src_null_reg, NULL);
+ }
+;
+
+subroutineinstruction:
+ predicate CALL execsize dst relativelocation instoptions
+ {
+ /*
+ Gen6 bspec:
+ source, dest type should be DWORD.
+ dest must be QWord aligned.
+ source0 region control must be <2,2,1>.
+ execution size must be 2.
+ QtrCtrl is prohibited.
+ JIP is an immediate operand, must be of type W.
+ Gen7 bspec:
+ source, dest type should be DWORD.
+ dest must be QWord aligned.
+ source0 region control must be <2,2,1>.
+ execution size must be 2.
+ */
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+
+ $4.type = BRW_REGISTER_TYPE_D; /* dest type should be DWORD */
+ $4.width = BRW_WIDTH_2; /* execution size must be 2. */
+ set_instruction_dest(&$$, &$4);
+
+ struct src_operand src0;
+ memset(&src0, 0, sizeof(src0));
+ src0.reg.type = BRW_REGISTER_TYPE_D; /* source type should be DWORD */
+ /* source0 region control must be <2,2,1>. */
+ src0.reg.hstride = 1; /*encoded 1*/
+ src0.reg.width = BRW_WIDTH_2;
+ src0.reg.vstride = 2; /*encoded 2*/
+ set_instruction_src0(&$$, &src0, NULL);
+
+ $$.reloc.first_reloc_target = $5.reloc_target;
+ $$.reloc.first_reloc_offset = $5.imm32;
+ }
+ | predicate RET execsize dstoperandex src instoptions
+ {
+ /*
+ Gen6, 7:
+ source cannot be accumulator.
+ dest must be null.
+ src0 region control must be <2,2,1> (not specified clearly. should be same as CALL)
+ */
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+ dst_null_reg.width = BRW_WIDTH_2; /* execution size of RET should be 2 */
+ set_instruction_dest(&$$, &dst_null_reg);
+ $5.reg.type = BRW_REGISTER_TYPE_D;
+ $5.reg.hstride = 1; /*encoded 1*/
+ $5.reg.width = BRW_WIDTH_2;
+ $5.reg.vstride = 2; /*encoded 2*/
+ set_instruction_src0(&$$, &$5, NULL);
+ }
+;
+
+unaryinstruction:
+ predicate unaryop conditionalmodifier saturate execsize
+ dst srcaccimm instoptions
+ {
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ set_instruction_saturate(&$$, $4);
+ $6.width = $5;
+ set_instruction_options(&$$, $8);
+ set_instruction_pred_cond(&$$, &$1, &$3, &@3);
+ if (set_instruction_dest(&$$, &$6) != 0)
+ YYERROR;
+ if (set_instruction_src0(&$$, &$7, &@7) != 0)
+ YYERROR;
+
+ if (!IS_GENp(6) &&
+ get_type_size(GEN(&$$)->bits1.da1.dest_reg_type) * (1 << $6.width) == 64)
+ GEN(&$$)->header.compression_control = BRW_COMPRESSION_COMPRESSED;
+ }
+;
+
+unaryop: MOV | FRC | RNDU | RNDD | RNDE | RNDZ | NOT | LZD | BFREV | CBIT
+ | F16TO32 | F32TO16 | FBH | FBL
+;
+
+// Source operands cannot be accumulators
+binaryinstruction:
+ predicate binaryop conditionalmodifier saturate execsize
+ dst src srcimm instoptions
+ {
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ set_instruction_saturate(&$$, $4);
+ set_instruction_options(&$$, $9);
+ set_instruction_pred_cond(&$$, &$1, &$3, &@3);
+ $6.width = $5;
+ if (set_instruction_dest(&$$, &$6) != 0)
+ YYERROR;
+ if (set_instruction_src0(&$$, &$7, &@7) != 0)
+ YYERROR;
+ if (set_instruction_src1(&$$, &$8, &@8) != 0)
+ YYERROR;
+
+ if (!IS_GENp(6) &&
+ get_type_size(GEN(&$$)->bits1.da1.dest_reg_type) * (1 << $6.width) == 64)
+ GEN(&$$)->header.compression_control = BRW_COMPRESSION_COMPRESSED;
+ }
+;
+
+/* bspec: BFI1 should not access accumulator. */
+binaryop: MUL | MAC | MACH | LINE | SAD2 | SADA2 | DP4 | DPH | DP3 | DP2 | PLN | BFI1
+;
+
+// Source operands can be accumulators
+binaryaccinstruction:
+ predicate binaryaccop conditionalmodifier saturate execsize
+ dst srcacc srcimm instoptions
+ {
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ set_instruction_saturate(&$$, $4);
+ $6.width = $5;
+ set_instruction_options(&$$, $9);
+ set_instruction_pred_cond(&$$, &$1, &$3, &@3);
+ if (set_instruction_dest(&$$, &$6) != 0)
+ YYERROR;
+ if (set_instruction_src0(&$$, &$7, &@7) != 0)
+ YYERROR;
+ if (set_instruction_src1(&$$, &$8, &@8) != 0)
+ YYERROR;
+
+ if (!IS_GENp(6) &&
+ get_type_size(GEN(&$$)->bits1.da1.dest_reg_type) * (1 << $6.width) == 64)
+ GEN(&$$)->header.compression_control = BRW_COMPRESSION_COMPRESSED;
+ }
+;
+
+/* TODO: bspec says ADDC/SUBB/CMP/CMPN/SHL/BFI1 cannot use accumulator as dest. */
+binaryaccop: AVG | ADD | SEL | AND | OR | XOR | SHR | SHL | ASR | CMP | CMPN | ADDC | SUBB
+;
+
+trinaryop: MAD | LRP | BFE | BFI2
+;
+
+trinaryinstruction:
+ predicate trinaryop conditionalmodifier saturate execsize
+ dst src src src instoptions
+{
+ memset(&$$, 0, sizeof($$));
+
+ set_instruction_pred_cond(&$$, &$1, &$3, &@3);
+
+ set_instruction_opcode(&$$, $2);
+ set_instruction_saturate(&$$, $4);
+
+ $6.width = $5;
+ if (set_instruction_dest_three_src(&$$, &$6))
+ YYERROR;
+ if (set_instruction_src0_three_src(&$$, &$7))
+ YYERROR;
+ if (set_instruction_src1_three_src(&$$, &$8))
+ YYERROR;
+ if (set_instruction_src2_three_src(&$$, &$9))
+ YYERROR;
+ set_instruction_options(&$$, $10);
+}
+;
+
+sendinstruction: predicate SEND execsize exp post_dst payload msgtarget
+ MSGLEN exp RETURNLEN exp instoptions
+ {
+ /* Send instructions are messy. The first argument is the
+ * post destination -- the grf register that the response
+ * starts from. The second argument is the current
+ * destination, which is the start of the message arguments
+ * to the shared function, and where src0 payload is loaded
+ * to if not null. The payload is typically based on the
+ * grf 0 thread payload of your current thread, and is
+ * implicitly loaded if non-null.
+ */
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ $5.width = $3;
+ GEN(&$$)->header.destreg__conditionalmod = $4; /* msg reg index */
+ set_instruction_predicate(&$$, &$1);
+ if (set_instruction_dest(&$$, &$5) != 0)
+ YYERROR;
+
+ if (IS_GENp(6)) {
+ struct src_operand src0;
+
+ memset(&src0, 0, sizeof(src0));
+ src0.reg.address_mode = BRW_ADDRESS_DIRECT;
+
+ if (IS_GENp(7))
+ src0.reg.file = BRW_GENERAL_REGISTER_FILE;
+ else
+ src0.reg.file = BRW_MESSAGE_REGISTER_FILE;
+
+ src0.reg.type = BRW_REGISTER_TYPE_D;
+ src0.reg.nr = $4;
+ src0.reg.subnr = 0;
+ set_instruction_src0(&$$, &src0, NULL);
+ } else {
+ if (set_instruction_src0(&$$, &$6, &@6) != 0)
+ YYERROR;
+ }
+
+ GEN(&$$)->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE;
+ GEN(&$$)->bits1.da1.src1_reg_type = BRW_REGISTER_TYPE_D;
+
+ if (IS_GENp(5)) {
+ if (IS_GENp(6)) {
+ GEN(&$$)->header.destreg__conditionalmod = GEN(&$7)->bits2.send_gen5.sfid;
+ } else {
+ GEN(&$$)->header.destreg__conditionalmod = $4; /* msg reg index */
+ GEN(&$$)->bits2.send_gen5.sfid = GEN(&$7)->bits2.send_gen5.sfid;
+ GEN(&$$)->bits2.send_gen5.end_of_thread = $12.end_of_thread;
+ }
+
+ GEN(&$$)->bits3.generic_gen5 = GEN(&$7)->bits3.generic_gen5;
+ GEN(&$$)->bits3.generic_gen5.msg_length = $9;
+ GEN(&$$)->bits3.generic_gen5.response_length = $11;
+ GEN(&$$)->bits3.generic_gen5.end_of_thread = $12.end_of_thread;
+ } else {
+ GEN(&$$)->header.destreg__conditionalmod = $4; /* msg reg index */
+ GEN(&$$)->bits3.generic = GEN(&$7)->bits3.generic;
+ GEN(&$$)->bits3.generic.msg_length = $9;
+ GEN(&$$)->bits3.generic.response_length = $11;
+ GEN(&$$)->bits3.generic.end_of_thread = $12.end_of_thread;
+ }
+ }
+ | predicate SEND execsize dst sendleadreg payload directsrcoperand instoptions
+ {
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.destreg__conditionalmod = $5.nr; /* msg reg index */
+
+ set_instruction_predicate(&$$, &$1);
+
+ $4.width = $3;
+ if (set_instruction_dest(&$$, &$4) != 0)
+ YYERROR;
+ if (set_instruction_src0(&$$, &$6, &@6) != 0)
+ YYERROR;
+ /* XXX is this correct? */
+ if (set_instruction_src1(&$$, &$7, &@7) != 0)
+ YYERROR;
+
+ }
+ | predicate SEND execsize dst sendleadreg payload imm32reg instoptions
+ {
+ if ($7.reg.type != BRW_REGISTER_TYPE_UD &&
+ $7.reg.type != BRW_REGISTER_TYPE_D &&
+ $7.reg.type != BRW_REGISTER_TYPE_V) {
+ error (&@7, "non-int D/UD/V representation: %d,"
+ "type=%d\n", $7.reg.dw1.ud, $7.reg.type);
+ }
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.destreg__conditionalmod = $5.nr; /* msg reg index */
+
+ set_instruction_predicate(&$$, &$1);
+ $4.width = $3;
+ if (set_instruction_dest(&$$, &$4) != 0)
+ YYERROR;
+ if (set_instruction_src0(&$$, &$6, &@6) != 0)
+ YYERROR;
+ if (set_instruction_src1(&$$, &$7, &@7) != 0)
+ YYERROR;
+ }
+ | predicate SEND execsize dst sendleadreg sndopr imm32reg instoptions
+ {
+ struct src_operand src0;
+
+ if (!IS_GENp(6))
+ error(&@2, "the syntax of send instruction\n");
+
+ if ($7.reg.type != BRW_REGISTER_TYPE_UD &&
+ $7.reg.type != BRW_REGISTER_TYPE_D &&
+ $7.reg.type != BRW_REGISTER_TYPE_V) {
+ error(&@7,"non-int D/UD/V representation: %d,"
+ "type=%d\n", $7.reg.dw1.ud, $7.reg.type);
+ }
+
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.destreg__conditionalmod = ($6 & EX_DESC_SFID_MASK); /* SFID */
+ set_instruction_predicate(&$$, &$1);
+
+ $4.width = $3;
+ if (set_instruction_dest(&$$, &$4) != 0)
+ YYERROR;
+
+ memset(&src0, 0, sizeof(src0));
+ src0.reg.address_mode = BRW_ADDRESS_DIRECT;
+
+ if (IS_GENp(7)) {
+ src0.reg.file = BRW_GENERAL_REGISTER_FILE;
+ src0.reg.type = BRW_REGISTER_TYPE_UB;
+ } else {
+ src0.reg.file = BRW_MESSAGE_REGISTER_FILE;
+ src0.reg.type = BRW_REGISTER_TYPE_D;
+ }
+
+ src0.reg.nr = $5.nr;
+ src0.reg.subnr = 0;
+ set_instruction_src0(&$$, &src0, NULL);
+ set_instruction_src1(&$$, &$7, NULL);
+
+ GEN(&$$)->bits3.generic_gen5.end_of_thread = !!($6 & EX_DESC_EOT_MASK);
+ }
+ | predicate SEND execsize dst sendleadreg sndopr directsrcoperand instoptions
+ {
+ struct src_operand src0;
+
+ if (!IS_GENp(6))
+ error(&@2, "the syntax of send instruction\n");
+
+ if ($7.reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ ($7.reg.nr & 0xF0) != BRW_ARF_ADDRESS ||
+ ($7.reg.nr & 0x0F) != 0 ||
+ $7.reg.subnr != 0) {
+ error (&@7, "scalar register must be a0.0<0;1,0>:ud\n");
+ }
+
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.destreg__conditionalmod = ($6 & EX_DESC_SFID_MASK); /* SFID */
+ set_instruction_predicate(&$$, &$1);
+
+ $4.width = $3;
+ if (set_instruction_dest(&$$, &$4) != 0)
+ YYERROR;
+
+ memset(&src0, 0, sizeof(src0));
+ src0.reg.address_mode = BRW_ADDRESS_DIRECT;
+
+ if (IS_GENp(7)) {
+ src0.reg.file = BRW_GENERAL_REGISTER_FILE;
+ src0.reg.type = BRW_REGISTER_TYPE_UB;
+ } else {
+ src0.reg.file = BRW_MESSAGE_REGISTER_FILE;
+ src0.reg.type = BRW_REGISTER_TYPE_D;
+ }
+
+ src0.reg.nr = $5.nr;
+ src0.reg.subnr = 0;
+ set_instruction_src0(&$$, &src0, NULL);
+
+ set_instruction_src1(&$$, &$7, &@7);
+ GEN(&$$)->bits3.generic_gen5.end_of_thread = !!($6 & EX_DESC_EOT_MASK);
+ }
+ | predicate SEND execsize dst sendleadreg payload sndopr imm32reg instoptions
+ {
+ if ($8.reg.type != BRW_REGISTER_TYPE_UD &&
+ $8.reg.type != BRW_REGISTER_TYPE_D &&
+ $8.reg.type != BRW_REGISTER_TYPE_V) {
+ error(&@8, "non-int D/UD/V representation: %d,"
+ "type=%d\n", $8.reg.dw1.ud, $8.reg.type);
+ }
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.destreg__conditionalmod = $5.nr; /* msg reg index */
+
+ set_instruction_predicate(&$$, &$1);
+ $4.width = $3;
+ if (set_instruction_dest(&$$, &$4) != 0)
+ YYERROR;
+ if (set_instruction_src0(&$$, &$6, &@6) != 0)
+ YYERROR;
+ if (set_instruction_src1(&$$, &$8, &@8) != 0)
+ YYERROR;
+
+ if (IS_GENx(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid = ($7 & EX_DESC_SFID_MASK);
+ GEN(&$$)->bits3.generic_gen5.end_of_thread = !!($7 & EX_DESC_EOT_MASK);
+ }
+ }
+ | predicate SEND execsize dst sendleadreg payload exp directsrcoperand instoptions
+ {
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.destreg__conditionalmod = $5.nr; /* msg reg index */
+
+ set_instruction_predicate(&$$, &$1);
+
+ $4.width = $3;
+ if (set_instruction_dest(&$$, &$4) != 0)
+ YYERROR;
+ if (set_instruction_src0(&$$, &$6, &@6) != 0)
+ YYERROR;
+ /* XXX is this correct? */
+ if (set_instruction_src1(&$$, &$8, &@8) != 0)
+ YYERROR;
+ if (IS_GENx(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid = $7;
+ }
+ }
+
+;
+
+sndopr: exp %prec SNDOPR
+ {
+ $$ = $1;
+ }
+;
+
+jumpinstruction: predicate JMPI execsize relativelocation2
+ {
+ /* The jump instruction requires that the IP register
+ * be the destination and first source operand, while the
+ * offset is the second source operand. The next instruction
+ * is the post-incremented IP plus the offset.
+ */
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ if(advanced_flag)
+ GEN(&$$)->header.mask_control = BRW_MASK_DISABLE;
+ set_instruction_predicate(&$$, &$1);
+ ip_dst.width = BRW_WIDTH_1;
+ set_instruction_dest(&$$, &ip_dst);
+ set_instruction_src0(&$$, &ip_src, NULL);
+ set_instruction_src1(&$$, &$4, NULL);
+ $$.reloc.first_reloc_target = $4.reloc_target;
+ $$.reloc.first_reloc_offset = $4.imm32;
+ }
+;
+
+mathinstruction: predicate MATH_INST execsize dst src srcimm math_function instoptions
+ {
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.destreg__conditionalmod = $7;
+ set_instruction_options(&$$, $8);
+ set_instruction_predicate(&$$, &$1);
+ $4.width = $3;
+ if (set_instruction_dest(&$$, &$4) != 0)
+ YYERROR;
+ if (set_instruction_src0(&$$, &$5, &@5) != 0)
+ YYERROR;
+ if (set_instruction_src1(&$$, &$6, &@6) != 0)
+ YYERROR;
+ }
+;
+
+breakinstruction: predicate breakop execsize relativelocation relativelocation instoptions
+ {
+ // for Gen6, Gen7
+ memset(&$$, 0, sizeof($$));
+ set_instruction_predicate(&$$, &$1);
+ set_instruction_opcode(&$$, $2);
+ GEN(&$$)->header.execution_size = $3;
+ $$.reloc.first_reloc_target = $4.reloc_target;
+ $$.reloc.first_reloc_offset = $4.imm32;
+ $$.reloc.second_reloc_target = $5.reloc_target;
+ $$.reloc.second_reloc_offset = $5.imm32;
+ }
+;
+
+breakop: BREAK | CONT
+;
+
+/*
+maskpushop: MSAVE | PUSH
+;
+ */
+
+syncinstruction: predicate WAIT notifyreg
+ {
+ struct brw_reg notify_dst;
+ struct src_operand notify_src;
+
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $2);
+ set_direct_dst_operand(&notify_dst, &$3, BRW_REGISTER_TYPE_D);
+ notify_dst.width = BRW_WIDTH_1;
+ set_instruction_dest(&$$, &notify_dst);
+ set_direct_src_operand(&notify_src, &$3, BRW_REGISTER_TYPE_D);
+ set_instruction_src0(&$$, &notify_src, NULL);
+ set_instruction_src1(&$$, &src_null_reg, NULL);
+ }
+
+;
+
+nopinstruction: NOP
+ {
+ memset(&$$, 0, sizeof($$));
+ set_instruction_opcode(&$$, $1);
+ };
+
+/* XXX! */
+payload: directsrcoperand
+;
+
+post_dst: dst
+;
+
+msgtarget: NULL_TOKEN
+ {
+ if (IS_GENp(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid= BRW_SFID_NULL;
+ GEN(&$$)->bits3.generic_gen5.header_present = 0; /* ??? */
+ } else {
+ GEN(&$$)->bits3.generic.msg_target = BRW_SFID_NULL;
+ }
+ }
+ | SAMPLER LPAREN INTEGER COMMA INTEGER COMMA
+ sampler_datatype RPAREN
+ {
+ if (IS_GENp(7)) {
+ GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_SAMPLER;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1; /* ??? */
+ GEN(&$$)->bits3.sampler_gen7.binding_table_index = $3;
+ GEN(&$$)->bits3.sampler_gen7.sampler = $5;
+ GEN(&$$)->bits3.sampler_gen7.simd_mode = 2; /* SIMD16, maybe we should add a new parameter */
+ } else if (IS_GENp(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_SAMPLER;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1; /* ??? */
+ GEN(&$$)->bits3.sampler_gen5.binding_table_index = $3;
+ GEN(&$$)->bits3.sampler_gen5.sampler = $5;
+ GEN(&$$)->bits3.sampler_gen5.simd_mode = 2; /* SIMD16, maybe we should add a new parameter */
+ } else {
+ GEN(&$$)->bits3.generic.msg_target = BRW_SFID_SAMPLER;
+ GEN(&$$)->bits3.sampler.binding_table_index = $3;
+ GEN(&$$)->bits3.sampler.sampler = $5;
+ switch ($7) {
+ case TYPE_F:
+ GEN(&$$)->bits3.sampler.return_format =
+ BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+ break;
+ case TYPE_UD:
+ GEN(&$$)->bits3.sampler.return_format =
+ BRW_SAMPLER_RETURN_FORMAT_UINT32;
+ break;
+ case TYPE_D:
+ GEN(&$$)->bits3.sampler.return_format =
+ BRW_SAMPLER_RETURN_FORMAT_SINT32;
+ break;
+ }
+ }
+ }
+ | MATH math_function saturate math_signed math_scalar
+ {
+ if (IS_GENp(6)) {
+ error (&@1, "Gen6+ doesn't have math function\n");
+ } else if (IS_GENx(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_MATH;
+ GEN(&$$)->bits3.generic_gen5.header_present = 0;
+ GEN(&$$)->bits3.math_gen5.function = $2;
+ set_instruction_saturate(&$$, $3);
+ GEN(&$$)->bits3.math_gen5.int_type = $4;
+ GEN(&$$)->bits3.math_gen5.precision = BRW_MATH_PRECISION_FULL;
+ GEN(&$$)->bits3.math_gen5.data_type = $5;
+ } else {
+ GEN(&$$)->bits3.generic.msg_target = BRW_SFID_MATH;
+ GEN(&$$)->bits3.math.function = $2;
+ set_instruction_saturate(&$$, $3);
+ GEN(&$$)->bits3.math.int_type = $4;
+ GEN(&$$)->bits3.math.precision = BRW_MATH_PRECISION_FULL;
+ GEN(&$$)->bits3.math.data_type = $5;
+ }
+ }
+ | GATEWAY
+ {
+ if (IS_GENp(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_MESSAGE_GATEWAY;
+ GEN(&$$)->bits3.generic_gen5.header_present = 0; /* ??? */
+ } else {
+ GEN(&$$)->bits3.generic.msg_target = BRW_SFID_MESSAGE_GATEWAY;
+ }
+ }
+ | READ LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA
+ INTEGER RPAREN
+ {
+ if (IS_GENx(7)) {
+ GEN(&$$)->bits2.send_gen5.sfid =
+ GEN6_SFID_DATAPORT_SAMPLER_CACHE;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1;
+ GEN(&$$)->bits3.gen7_dp.binding_table_index = $3;
+ GEN(&$$)->bits3.gen7_dp.msg_control = $7;
+ GEN(&$$)->bits3.gen7_dp.msg_type = $9;
+ } else if (IS_GENx(6)) {
+ GEN(&$$)->bits2.send_gen5.sfid =
+ GEN6_SFID_DATAPORT_SAMPLER_CACHE;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1;
+ GEN(&$$)->bits3.gen6_dp_sampler_const_cache.binding_table_index = $3;
+ GEN(&$$)->bits3.gen6_dp_sampler_const_cache.msg_control = $7;
+ GEN(&$$)->bits3.gen6_dp_sampler_const_cache.msg_type = $9;
+ } else if (IS_GENx(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid =
+ BRW_SFID_DATAPORT_READ;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1;
+ GEN(&$$)->bits3.dp_read_gen5.binding_table_index = $3;
+ GEN(&$$)->bits3.dp_read_gen5.target_cache = $5;
+ GEN(&$$)->bits3.dp_read_gen5.msg_control = $7;
+ GEN(&$$)->bits3.dp_read_gen5.msg_type = $9;
+ } else {
+ GEN(&$$)->bits3.generic.msg_target =
+ BRW_SFID_DATAPORT_READ;
+ GEN(&$$)->bits3.dp_read.binding_table_index = $3;
+ GEN(&$$)->bits3.dp_read.target_cache = $5;
+ GEN(&$$)->bits3.dp_read.msg_control = $7;
+ GEN(&$$)->bits3.dp_read.msg_type = $9;
+ }
+ }
+ | WRITE LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA
+ INTEGER RPAREN
+ {
+ if (IS_GENx(7)) {
+ GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1;
+ GEN(&$$)->bits3.gen7_dp.binding_table_index = $3;
+ GEN(&$$)->bits3.gen7_dp.msg_control = $5;
+ GEN(&$$)->bits3.gen7_dp.msg_type = $7;
+ } else if (IS_GENx(6)) {
+ GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+ /* Sandybridge supports headerlesss message for render target write.
+ * Currently the GFX assembler doesn't support it. so the program must provide
+ * message header
+ */
+ GEN(&$$)->bits3.generic_gen5.header_present = 1;
+ GEN(&$$)->bits3.gen6_dp.binding_table_index = $3;
+ GEN(&$$)->bits3.gen6_dp.msg_control = $5;
+ GEN(&$$)->bits3.gen6_dp.msg_type = $7;
+ GEN(&$$)->bits3.gen6_dp.send_commit_msg = $9;
+ } else if (IS_GENx(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid =
+ BRW_SFID_DATAPORT_WRITE;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1;
+ GEN(&$$)->bits3.dp_write_gen5.binding_table_index = $3;
+ GEN(&$$)->bits3.dp_write_gen5.last_render_target = ($5 & 0x8) >> 3;
+ GEN(&$$)->bits3.dp_write_gen5.msg_control = $5 & 0x7;
+ GEN(&$$)->bits3.dp_write_gen5.msg_type = $7;
+ GEN(&$$)->bits3.dp_write_gen5.send_commit_msg = $9;
+ } else {
+ GEN(&$$)->bits3.generic.msg_target =
+ BRW_SFID_DATAPORT_WRITE;
+ GEN(&$$)->bits3.dp_write.binding_table_index = $3;
+ /* The msg control field of brw_struct.h is split into
+ * msg control and last_render_target, even though
+ * last_render_target isn't common to all write messages.
+ */
+ GEN(&$$)->bits3.dp_write.last_render_target = ($5 & 0x8) >> 3;
+ GEN(&$$)->bits3.dp_write.msg_control = $5 & 0x7;
+ GEN(&$$)->bits3.dp_write.msg_type = $7;
+ GEN(&$$)->bits3.dp_write.send_commit_msg = $9;
+ }
+ }
+ | WRITE LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA
+ INTEGER COMMA INTEGER RPAREN
+ {
+ if (IS_GENx(7)) {
+ GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+ GEN(&$$)->bits3.generic_gen5.header_present = ($11 != 0);
+ GEN(&$$)->bits3.gen7_dp.binding_table_index = $3;
+ GEN(&$$)->bits3.gen7_dp.msg_control = $5;
+ GEN(&$$)->bits3.gen7_dp.msg_type = $7;
+ } else if (IS_GENx(6)) {
+ GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+ GEN(&$$)->bits3.generic_gen5.header_present = ($11 != 0);
+ GEN(&$$)->bits3.gen6_dp.binding_table_index = $3;
+ GEN(&$$)->bits3.gen6_dp.msg_control = $5;
+ GEN(&$$)->bits3.gen6_dp.msg_type = $7;
+ GEN(&$$)->bits3.gen6_dp.send_commit_msg = $9;
+ } else if (IS_GENx(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid =
+ BRW_SFID_DATAPORT_WRITE;
+ GEN(&$$)->bits3.generic_gen5.header_present = ($11 != 0);
+ GEN(&$$)->bits3.dp_write_gen5.binding_table_index = $3;
+ GEN(&$$)->bits3.dp_write_gen5.last_render_target = ($5 & 0x8) >> 3;
+ GEN(&$$)->bits3.dp_write_gen5.msg_control = $5 & 0x7;
+ GEN(&$$)->bits3.dp_write_gen5.msg_type = $7;
+ GEN(&$$)->bits3.dp_write_gen5.send_commit_msg = $9;
+ } else {
+ GEN(&$$)->bits3.generic.msg_target =
+ BRW_SFID_DATAPORT_WRITE;
+ GEN(&$$)->bits3.dp_write.binding_table_index = $3;
+ /* The msg control field of brw_struct.h is split into
+ * msg control and last_render_target, even though
+ * last_render_target isn't common to all write messages.
+ */
+ GEN(&$$)->bits3.dp_write.last_render_target = ($5 & 0x8) >> 3;
+ GEN(&$$)->bits3.dp_write.msg_control = $5 & 0x7;
+ GEN(&$$)->bits3.dp_write.msg_type = $7;
+ GEN(&$$)->bits3.dp_write.send_commit_msg = $9;
+ }
+ }
+ | URB INTEGER urb_swizzle urb_allocate urb_used urb_complete
+ {
+ GEN(&$$)->bits3.generic.msg_target = BRW_SFID_URB;
+ if (IS_GENp(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid = BRW_SFID_URB;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1;
+ set_instruction_opcode(&$$, BRW_URB_OPCODE_WRITE);
+ GEN(&$$)->bits3.urb_gen5.offset = $2;
+ GEN(&$$)->bits3.urb_gen5.swizzle_control = $3;
+ GEN(&$$)->bits3.urb_gen5.pad = 0;
+ GEN(&$$)->bits3.urb_gen5.allocate = $4;
+ GEN(&$$)->bits3.urb_gen5.used = $5;
+ GEN(&$$)->bits3.urb_gen5.complete = $6;
+ } else {
+ GEN(&$$)->bits3.generic.msg_target = BRW_SFID_URB;
+ set_instruction_opcode(&$$, BRW_URB_OPCODE_WRITE);
+ GEN(&$$)->bits3.urb.offset = $2;
+ GEN(&$$)->bits3.urb.swizzle_control = $3;
+ GEN(&$$)->bits3.urb.pad = 0;
+ GEN(&$$)->bits3.urb.allocate = $4;
+ GEN(&$$)->bits3.urb.used = $5;
+ GEN(&$$)->bits3.urb.complete = $6;
+ }
+ }
+ | THREAD_SPAWNER LPAREN INTEGER COMMA INTEGER COMMA
+ INTEGER RPAREN
+ {
+ GEN(&$$)->bits3.generic.msg_target =
+ BRW_SFID_THREAD_SPAWNER;
+ if (IS_GENp(5)) {
+ GEN(&$$)->bits2.send_gen5.sfid =
+ BRW_SFID_THREAD_SPAWNER;
+ GEN(&$$)->bits3.generic_gen5.header_present = 0;
+ GEN(&$$)->bits3.thread_spawner_gen5.opcode = $3;
+ GEN(&$$)->bits3.thread_spawner_gen5.requester_type = $5;
+ GEN(&$$)->bits3.thread_spawner_gen5.resource_select = $7;
+ } else {
+ GEN(&$$)->bits3.generic.msg_target =
+ BRW_SFID_THREAD_SPAWNER;
+ GEN(&$$)->bits3.thread_spawner.opcode = $3;
+ GEN(&$$)->bits3.thread_spawner.requester_type = $5;
+ GEN(&$$)->bits3.thread_spawner.resource_select = $7;
+ }
+ }
+ | VME LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA INTEGER RPAREN
+ {
+ GEN(&$$)->bits3.generic.msg_target = GEN6_SFID_VME;
+
+ if (IS_GENp(6)) {
+ GEN(&$$)->bits2.send_gen5.sfid = GEN6_SFID_VME;
+ GEN(&$$)->bits3.vme_gen6.binding_table_index = $3;
+ GEN(&$$)->bits3.vme_gen6.search_path_index = $5;
+ GEN(&$$)->bits3.vme_gen6.lut_subindex = $7;
+ GEN(&$$)->bits3.vme_gen6.message_type = $9;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1;
+ } else {
+ error (&@1, "Gen6- doesn't have vme function\n");
+ }
+ }
+ | CRE LPAREN INTEGER COMMA INTEGER RPAREN
+ {
+ if (gen_level < 75)
+ error (&@1, "Below Gen7.5 doesn't have CRE function\n");
+
+ GEN(&$$)->bits3.generic.msg_target = HSW_SFID_CRE;
+
+ GEN(&$$)->bits2.send_gen5.sfid = HSW_SFID_CRE;
+ GEN(&$$)->bits3.cre_gen75.binding_table_index = $3;
+ GEN(&$$)->bits3.cre_gen75.message_type = $5;
+ GEN(&$$)->bits3.generic_gen5.header_present = 1;
+ }
+
+ | DATA_PORT LPAREN INTEGER COMMA INTEGER COMMA INTEGER COMMA
+ INTEGER COMMA INTEGER COMMA INTEGER RPAREN
+ {
+ GEN(&$$)->bits2.send_gen5.sfid = $3;
+ GEN(&$$)->bits3.generic_gen5.header_present = ($13 != 0);
+
+ if (IS_GENp(7)) {
+ if ($3 != GEN6_SFID_DATAPORT_SAMPLER_CACHE &&
+ $3 != GEN6_SFID_DATAPORT_RENDER_CACHE &&
+ $3 != GEN6_SFID_DATAPORT_CONSTANT_CACHE &&
+ $3 != GEN7_SFID_DATAPORT_DATA_CACHE) {
+ error (&@3, "error: wrong cache type\n");
+ }
+
+ GEN(&$$)->bits3.gen7_dp.category = $11;
+ GEN(&$$)->bits3.gen7_dp.binding_table_index = $9;
+ GEN(&$$)->bits3.gen7_dp.msg_control = $7;
+ GEN(&$$)->bits3.gen7_dp.msg_type = $5;
+ } else if (IS_GENx(6)) {
+ if ($3 != GEN6_SFID_DATAPORT_SAMPLER_CACHE &&
+ $3 != GEN6_SFID_DATAPORT_RENDER_CACHE &&
+ $3 != GEN6_SFID_DATAPORT_CONSTANT_CACHE) {
+ error (&@3, "error: wrong cache type\n");
+ }
+
+ GEN(&$$)->bits3.gen6_dp.send_commit_msg = $11;
+ GEN(&$$)->bits3.gen6_dp.binding_table_index = $9;
+ GEN(&$$)->bits3.gen6_dp.msg_control = $7;
+ GEN(&$$)->bits3.gen6_dp.msg_type = $5;
+ } else if (!IS_GENp(5)) {
+ error (&@1, "Gen6- doesn't support data port for sampler/render/constant/data cache\n");
+ }
+ }
+;
+
+urb_allocate: ALLOCATE { $$ = 1; }
+ | /* empty */ { $$ = 0; }
+;
+
+urb_used: USED { $$ = 1; }
+ | /* empty */ { $$ = 0; }
+;
+
+urb_complete: COMPLETE { $$ = 1; }
+ | /* empty */ { $$ = 0; }
+;
+
+urb_swizzle: TRANSPOSE { $$ = BRW_URB_SWIZZLE_TRANSPOSE; }
+ | INTERLEAVE { $$ = BRW_URB_SWIZZLE_INTERLEAVE; }
+ | /* empty */ { $$ = BRW_URB_SWIZZLE_NONE; }
+;
+
+sampler_datatype:
+ TYPE_F
+ | TYPE_UD
+ | TYPE_D
+;
+
+math_function: INV | LOG | EXP | SQRT | POW | SIN | COS | SINCOS | INTDIV
+ | INTMOD | INTDIVMOD
+;
+
+math_signed: /* empty */ { $$ = 0; }
+ | SIGNED { $$ = 1; }
+;
+
+math_scalar: /* empty */ { $$ = 0; }
+ | SCALAR { $$ = 1; }
+;
+
+/* 1.4.2: Destination register */
+
+dst: dstoperand | dstoperandex
+;
+
+dstoperand: symbol_reg dstregion
+ {
+ $$ = $1.reg;
+ $$.hstride = resolve_dst_region(&$1, $2);
+ }
+ | dstreg dstregion writemask regtype
+ {
+ /* Returns an instruction with just the destination register
+ * filled in.
+ */
+ $$ = $1;
+ $$.hstride = resolve_dst_region(NULL, $2);
+ $$.dw1.bits.writemask = $3.dw1.bits.writemask;
+ $$.type = $4.type;
+ }
+;
+
+/* The dstoperandex returns an instruction with just the destination register
+ * filled in.
+ */
+dstoperandex: dstoperandex_typed dstregion regtype
+ {
+ $$ = $1;
+ $$.hstride = resolve_dst_region(NULL, $2);
+ $$.type = $3.type;
+ }
+ | maskstackreg
+ {
+ $$ = $1;
+ $$.hstride = 1;
+ $$.type = BRW_REGISTER_TYPE_UW;
+ }
+ | controlreg
+ {
+ $$ = $1;
+ $$.hstride = 1;
+ $$.type = BRW_REGISTER_TYPE_UD;
+ }
+ | ipreg
+ {
+ $$ = $1;
+ $$.hstride = 1;
+ $$.type = BRW_REGISTER_TYPE_UD;
+ }
+ | nullreg dstregion regtype
+ {
+ $$ = $1;
+ $$.hstride = resolve_dst_region(NULL, $2);
+ $$.type = $3.type;
+ }
+;
+
+dstoperandex_typed: accreg | flagreg | addrreg | maskreg
+;
+
+symbol_reg: STRING %prec STR_SYMBOL_REG
+ {
+ struct declared_register *dcl_reg = find_register($1);
+
+ if (dcl_reg == NULL)
+ error(&@1, "can't find register %s\n", $1);
+
+ memcpy(&$$, dcl_reg, sizeof(*dcl_reg));
+ free($1); // $1 has been malloc'ed by strdup
+ }
+ | symbol_reg_p
+ {
+ $$=$1;
+ }
+;
+
+symbol_reg_p: STRING LPAREN exp RPAREN
+ {
+ struct declared_register *dcl_reg = find_register($1);
+
+ if (dcl_reg == NULL)
+ error(&@1, "can't find register %s\n", $1);
+
+ memcpy(&$$, dcl_reg, sizeof(*dcl_reg));
+ $$.reg.nr += $3;
+ free($1);
+ }
+ | STRING LPAREN exp COMMA exp RPAREN
+ {
+ struct declared_register *dcl_reg = find_register($1);
+
+ if (dcl_reg == NULL)
+ error(&@1, "can't find register %s\n", $1);
+
+ memcpy(&$$, dcl_reg, sizeof(*dcl_reg));
+ $$.reg.nr += $3;
+ if(advanced_flag) {
+ int size = get_type_size(dcl_reg->reg.type);
+ $$.reg.nr += ($$.reg.subnr + $5) / (32 / size);
+ $$.reg.subnr = ($$.reg.subnr + $5) % (32 / size);
+ } else {
+ $$.reg.nr += ($$.reg.subnr + $5) / 32;
+ $$.reg.subnr = ($$.reg.subnr + $5) % 32;
+ }
+ free($1);
+ }
+;
+/* Returns a partially complete destination register consisting of the
+ * direct or indirect register addressing fields, but not stride or writemask.
+ */
+dstreg: directgenreg
+ {
+ $$ = $1;
+ $$.address_mode = BRW_ADDRESS_DIRECT;
+ }
+ | directmsgreg
+ {
+ $$ = $1;
+ $$.address_mode = BRW_ADDRESS_DIRECT;
+ }
+ | indirectgenreg
+ {
+ $$ = $1;
+ $$.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ }
+ | indirectmsgreg
+ {
+ $$ = $1;
+ $$.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ }
+;
+
+/* 1.4.3: Source register */
+srcaccimm: srcacc | imm32reg
+;
+
+srcacc: directsrcaccoperand | indirectsrcoperand
+;
+
+srcimm: directsrcoperand | indirectsrcoperand| imm32reg
+;
+
+imm32reg: imm32 srcimmtype
+ {
+ union {
+ int i;
+ float f;
+ } intfloat;
+ uint32_t d;
+
+ switch ($2) {
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_V:
+ case BRW_REGISTER_TYPE_VF:
+ switch ($1.r) {
+ case imm32_d:
+ d = $1.u.d;
+ break;
+ default:
+ error (&@2, "non-int D/UD/V/VF representation: %d,type=%d\n", $1.r, $2);
+ }
+ break;
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ switch ($1.r) {
+ case imm32_d:
+ d = $1.u.d;
+ break;
+ default:
+ error (&@2, "non-int W/UW representation\n");
+ }
+ d &= 0xffff;
+ d |= d << 16;
+ break;
+ case BRW_REGISTER_TYPE_F:
+ switch ($1.r) {
+ case imm32_f:
+ intfloat.f = $1.u.f;
+ break;
+ case imm32_d:
+ intfloat.f = (float) $1.u.d;
+ break;
+ default:
+ error (&@2, "non-float F representation\n");
+ }
+ d = intfloat.i;
+ break;
+#if 0
+ case BRW_REGISTER_TYPE_VF:
+ fprintf (stderr, "Immediate type VF not supported yet\n");
+ YYERROR;
+#endif
+ default:
+ error(&@2, "unknown immediate type %d\n", $2);
+ }
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.file = BRW_IMMEDIATE_VALUE;
+ $$.reg.type = $2;
+ $$.reg.dw1.ud = d;
+ }
+;
+
+directsrcaccoperand: directsrcoperand
+ | accreg region regtype
+ {
+ set_direct_src_operand(&$$, &$1, $3.type);
+ $$.reg.vstride = $2.vert_stride;
+ $$.reg.width = $2.width;
+ $$.reg.hstride = $2.horiz_stride;
+ $$.default_region = $2.is_default;
+ }
+;
+
+/* Returns a source operand in the src0 fields of an instruction. */
+srcarchoperandex: srcarchoperandex_typed region regtype
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.file = $1.file;
+ $$.reg.type = $3.type;
+ $$.reg.subnr = $1.subnr;
+ $$.reg.nr = $1.nr;
+ $$.reg.vstride = $2.vert_stride;
+ $$.reg.width = $2.width;
+ $$.reg.hstride = $2.horiz_stride;
+ $$.default_region = $2.is_default;
+ $$.reg.negate = 0;
+ $$.reg.abs = 0;
+ }
+ | maskstackreg
+ {
+ set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UB);
+ }
+ | controlreg
+ {
+ set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD);
+ }
+/* | statereg
+ {
+ set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD);
+ }*/
+ | notifyreg
+ {
+ set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD);
+ }
+ | ipreg
+ {
+ set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD);
+ }
+ | nullreg region regtype
+ {
+ if ($3.is_default) {
+ set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD);
+ } else {
+ set_direct_src_operand(&$$, &$1, $3.type);
+ }
+ $$.default_region = 1;
+ }
+;
+
+srcarchoperandex_typed: flagreg | addrreg | maskreg
+;
+
+sendleadreg: symbol_reg
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = $1.reg.file;
+ $$.nr = $1.reg.nr;
+ $$.subnr = $1.reg.subnr;
+ }
+ | directgenreg | directmsgreg
+;
+
+src: directsrcoperand | indirectsrcoperand
+;
+
+directsrcoperand: negate abs symbol_reg region regtype
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.address_mode = BRW_ADDRESS_DIRECT;
+ $$.reg.file = $3.reg.file;
+ $$.reg.nr = $3.reg.nr;
+ $$.reg.subnr = $3.reg.subnr;
+ if ($5.is_default) {
+ $$.reg.type = $3.reg.type;
+ } else {
+ $$.reg.type = $5.type;
+ }
+ if ($4.is_default) {
+ $$.reg.vstride = $3.src_region.vert_stride;
+ $$.reg.width = $3.src_region.width;
+ $$.reg.hstride = $3.src_region.horiz_stride;
+ } else {
+ $$.reg.vstride = $4.vert_stride;
+ $$.reg.width = $4.width;
+ $$.reg.hstride = $4.horiz_stride;
+ }
+ $$.reg.negate = $1;
+ $$.reg.abs = $2;
+ }
+ | statereg region regtype
+ {
+ if($2.is_default ==1 && $3.is_default == 1)
+ {
+ set_direct_src_operand(&$$, &$1, BRW_REGISTER_TYPE_UD);
+ }
+ else{
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.address_mode = BRW_ADDRESS_DIRECT;
+ $$.reg.file = $1.file;
+ $$.reg.nr = $1.nr;
+ $$.reg.subnr = $1.subnr;
+ $$.reg.vstride = $2.vert_stride;
+ $$.reg.width = $2.width;
+ $$.reg.hstride = $2.horiz_stride;
+ $$.reg.type = $3.type;
+ }
+ }
+ | negate abs directgenreg region regtype swizzle
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.address_mode = BRW_ADDRESS_DIRECT;
+ $$.reg.file = $3.file;
+ $$.reg.nr = $3.nr;
+ $$.reg.subnr = $3.subnr;
+ $$.reg.type = $5.type;
+ $$.reg.vstride = $4.vert_stride;
+ $$.reg.width = $4.width;
+ $$.reg.hstride = $4.horiz_stride;
+ $$.default_region = $4.is_default;
+ $$.reg.negate = $1;
+ $$.reg.abs = $2;
+ $$.reg.dw1.bits.swizzle = $6.reg.dw1.bits.swizzle;
+ }
+ | srcarchoperandex
+;
+
+indirectsrcoperand:
+ negate abs indirectgenreg indirectregion regtype swizzle
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ $$.reg.file = $3.file;
+ $$.reg.subnr = $3.subnr;
+ $$.reg.dw1.bits.indirect_offset = $3.dw1.bits.indirect_offset;
+ $$.reg.type = $5.type;
+ $$.reg.vstride = $4.vert_stride;
+ $$.reg.width = $4.width;
+ $$.reg.hstride = $4.horiz_stride;
+ $$.reg.negate = $1;
+ $$.reg.abs = $2;
+ $$.reg.dw1.bits.swizzle = $6.reg.dw1.bits.swizzle;
+ }
+;
+
+/* 1.4.4: Address Registers */
+/* Returns a partially-completed struct brw_reg consisting of the address
+ * register fields for register-indirect access.
+ */
+addrparam: addrreg COMMA immaddroffset
+ {
+ if ($3 < -512 || $3 > 511)
+ error(&@3, "Address immediate offset %d out of range\n", $3);
+ memset (&$$, '\0', sizeof ($$));
+ $$.subnr = $1.subnr;
+ $$.dw1.bits.indirect_offset = $3;
+ }
+ | addrreg
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.subnr = $1.subnr;
+ $$.dw1.bits.indirect_offset = 0;
+ }
+;
+
+/* The immaddroffset provides an immediate offset value added to the addresses
+ * from the address register in register-indirect register access.
+ */
+immaddroffset: /* empty */ { $$ = 0; }
+ | exp
+;
+
+
+/* 1.4.5: Register files and register numbers */
+subregnum: DOT exp
+ {
+ $$ = $2;
+ }
+ | %prec SUBREGNUM
+ {
+ /* Default to subreg 0 if unspecified. */
+ $$ = 0;
+ }
+;
+
+directgenreg: GENREG subregnum
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_GENERAL_REGISTER_FILE;
+ $$.nr = $1;
+ $$.subnr = $2;
+ }
+;
+
+indirectgenreg: GENREGFILE LSQUARE addrparam RSQUARE
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_GENERAL_REGISTER_FILE;
+ $$.subnr = $3.subnr;
+ $$.dw1.bits.indirect_offset = $3.dw1.bits.indirect_offset;
+ }
+;
+
+directmsgreg: MSGREG subregnum
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_MESSAGE_REGISTER_FILE;
+ $$.nr = $1;
+ $$.subnr = $2;
+ }
+;
+
+indirectmsgreg: MSGREGFILE LSQUARE addrparam RSQUARE
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_MESSAGE_REGISTER_FILE;
+ $$.subnr = $3.subnr;
+ $$.dw1.bits.indirect_offset = $3.dw1.bits.indirect_offset;
+ }
+;
+
+addrreg: ADDRESSREG subregnum
+ {
+ if ($1 != 0)
+ error(&@2, "address register number %d out of range", $1);
+
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_ADDRESS | $1;
+ $$.subnr = $2;
+ }
+;
+
+accreg: ACCREG subregnum
+ {
+ if ($1 > 1)
+ error(&@1, "accumulator register number %d out of range", $1);
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_ACCUMULATOR | $1;
+ $$.subnr = $2;
+ }
+;
+
+flagreg: FLAGREG subregnum
+ {
+ if ((!IS_GENp(7) && $1 > 0) ||
+ (IS_GENp(7) && $1 > 1)) {
+ error(&@2, "flag register number %d out of range\n", $1);
+ }
+
+ if ($2 > 1)
+ error(&@2, "flag subregister number %d out of range\n", $1);
+
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_FLAG | $1;
+ $$.subnr = $2;
+ }
+;
+
+maskreg: MASKREG subregnum
+ {
+ if ($1 > 0)
+ error(&@1, "mask register number %d out of range", $1);
+
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_MASK;
+ $$.subnr = $2;
+ }
+ | mask_subreg
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_MASK;
+ $$.subnr = $1;
+ }
+;
+
+mask_subreg: AMASK | IMASK | LMASK | CMASK
+;
+
+maskstackreg: MASKSTACKREG subregnum
+ {
+ if ($1 > 0)
+ error(&@1, "mask stack register number %d out of range", $1);
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_MASK_STACK;
+ $$.subnr = $2;
+ }
+ | maskstack_subreg
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_MASK_STACK;
+ $$.subnr = $1;
+ }
+;
+
+maskstack_subreg: IMS | LMS
+;
+
+/*
+maskstackdepthreg: MASKSTACKDEPTHREG subregnum
+ {
+ if ($1 > 0)
+ error(&@1, "mask stack register number %d out of range", $1);
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg_file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.reg_nr = BRW_ARF_MASK_STACK_DEPTH;
+ $$.subreg_nr = $2;
+ }
+ | maskstackdepth_subreg
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg_file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.reg_nr = BRW_ARF_MASK_STACK_DEPTH;
+ $$.subreg_nr = $1;
+ }
+;
+
+maskstackdepth_subreg: IMSD | LMSD
+;
+ */
+
+notifyreg: NOTIFYREG regtype
+ {
+ int num_notifyreg = (IS_GENp(6)) ? 3 : 2;
+
+ if ($1 > num_notifyreg)
+ error(&@1, "notification register number %d out of range",
+ $1);
+
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+
+ if (IS_GENp(6)) {
+ $$.nr = BRW_ARF_NOTIFICATION_COUNT;
+ $$.subnr = $1;
+ } else {
+ $$.nr = BRW_ARF_NOTIFICATION_COUNT | $1;
+ $$.subnr = 0;
+ }
+ }
+/*
+ | NOTIFYREG regtype
+ {
+ if ($1 > 1) {
+ fprintf(stderr,
+ "notification register number %d out of range",
+ $1);
+ YYERROR;
+ }
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg_file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.reg_nr = BRW_ARF_NOTIFICATION_COUNT;
+ $$.subreg_nr = 0;
+ }
+*/
+;
+
+statereg: STATEREG subregnum
+ {
+ if ($1 > 0)
+ error(&@1, "state register number %d out of range", $1);
+
+ if ($2 > 1)
+ error(&@2, "state subregister number %d out of range", $1);
+
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_STATE | $1;
+ $$.subnr = $2;
+ }
+;
+
+controlreg: CONTROLREG subregnum
+ {
+ if ($1 > 0)
+ error(&@1, "control register number %d out of range", $1);
+
+ if ($2 > 2)
+ error(&@2, "control subregister number %d out of range", $1);
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_CONTROL | $1;
+ $$.subnr = $2;
+ }
+;
+
+ipreg: IPREG regtype
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_IP;
+ $$.subnr = 0;
+ }
+;
+
+nullreg: NULL_TOKEN
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.file = BRW_ARCHITECTURE_REGISTER_FILE;
+ $$.nr = BRW_ARF_NULL;
+ $$.subnr = 0;
+ }
+;
+
+/* 1.4.6: Relative locations */
+relativelocation:
+ simple_int
+ {
+ if (($1 > 32767) || ($1 < -32768))
+ error(&@1, "error: relative offset %d out of range \n", $1);
+
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.file = BRW_IMMEDIATE_VALUE;
+ $$.reg.type = BRW_REGISTER_TYPE_D;
+ $$.imm32 = $1 & 0x0000ffff;
+ }
+ | STRING
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.file = BRW_IMMEDIATE_VALUE;
+ $$.reg.type = BRW_REGISTER_TYPE_D;
+ $$.reloc_target = $1;
+ }
+;
+
+relativelocation2:
+ STRING
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.file = BRW_IMMEDIATE_VALUE;
+ $$.reg.type = BRW_REGISTER_TYPE_D;
+ $$.reloc_target = $1;
+ }
+ | exp
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.file = BRW_IMMEDIATE_VALUE;
+ $$.reg.type = BRW_REGISTER_TYPE_D;
+ $$.imm32 = $1;
+ }
+ | directgenreg region regtype
+ {
+ set_direct_src_operand(&$$, &$1, $3.type);
+ $$.reg.vstride = $2.vert_stride;
+ $$.reg.width = $2.width;
+ $$.reg.hstride = $2.horiz_stride;
+ $$.default_region = $2.is_default;
+ }
+ | symbol_reg_p
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.address_mode = BRW_ADDRESS_DIRECT;
+ $$.reg.file = $1.reg.file;
+ $$.reg.nr = $1.reg.nr;
+ $$.reg.subnr = $1.reg.subnr;
+ $$.reg.type = $1.reg.type;
+ $$.reg.vstride = $1.src_region.vert_stride;
+ $$.reg.width = $1.src_region.width;
+ $$.reg.hstride = $1.src_region.horiz_stride;
+ }
+ | indirectgenreg indirectregion regtype
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ $$.reg.file = $1.file;
+ $$.reg.subnr = $1.subnr;
+ $$.reg.dw1.bits.indirect_offset = $1.dw1.bits.indirect_offset;
+ $$.reg.type = $3.type;
+ $$.reg.vstride = $2.vert_stride;
+ $$.reg.width = $2.width;
+ $$.reg.hstride = $2.horiz_stride;
+ }
+;
+
+/* 1.4.7: Regions */
+dstregion: /* empty */
+ {
+ $$ = DEFAULT_DSTREGION;
+ }
+ |LANGLE exp RANGLE
+ {
+ /* Returns a value for a horiz_stride field of an
+ * instruction.
+ */
+ if ($2 != 1 && $2 != 2 && $2 != 4)
+ error(&@2, "Invalid horiz size %d\n", $2);
+
+ $$ = ffs($2);
+ }
+;
+
+region: /* empty */
+ {
+ /* XXX is this default value correct?*/
+ memset (&$$, '\0', sizeof ($$));
+ $$.vert_stride = ffs(0);
+ $$.width = BRW_WIDTH_1;
+ $$.horiz_stride = ffs(0);
+ $$.is_default = 1;
+ }
+ |LANGLE exp RANGLE
+ {
+ /* XXX is this default value correct for accreg?*/
+ memset (&$$, '\0', sizeof ($$));
+ $$.vert_stride = ffs($2);
+ $$.width = BRW_WIDTH_1;
+ $$.horiz_stride = ffs(0);
+ }
+ |LANGLE exp COMMA exp COMMA exp RANGLE
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.vert_stride = ffs($2);
+ $$.width = ffs($4) - 1;
+ $$.horiz_stride = ffs($6);
+ }
+ | LANGLE exp SEMICOLON exp COMMA exp RANGLE
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.vert_stride = ffs($2);
+ $$.width = ffs($4) - 1;
+ $$.horiz_stride = ffs($6);
+ }
+
+;
+/* region_wh is used in specifying indirect operands where rather than having
+ * a vertical stride, you use subsequent address registers to get a new base
+ * offset for the next row.
+ */
+region_wh: LANGLE exp COMMA exp RANGLE
+ {
+ memset (&$$, '\0', sizeof ($$));
+ $$.vert_stride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
+ $$.width = ffs($2) - 1;
+ $$.horiz_stride = ffs($4);
+ }
+;
+
+indirectregion: region | region_wh
+;
+
+/* 1.4.8: Types */
+
+/* regtype returns an integer register type suitable for inserting into an
+ * instruction.
+ */
+regtype: /* empty */
+ { $$.type = program_defaults.register_type;$$.is_default = 1;}
+ | TYPE_F { $$.type = BRW_REGISTER_TYPE_F;$$.is_default = 0; }
+ | TYPE_UD { $$.type = BRW_REGISTER_TYPE_UD;$$.is_default = 0; }
+ | TYPE_D { $$.type = BRW_REGISTER_TYPE_D;$$.is_default = 0; }
+ | TYPE_UW { $$.type = BRW_REGISTER_TYPE_UW;$$.is_default = 0; }
+ | TYPE_W { $$.type = BRW_REGISTER_TYPE_W;$$.is_default = 0; }
+ | TYPE_UB { $$.type = BRW_REGISTER_TYPE_UB;$$.is_default = 0; }
+ | TYPE_B { $$.type = BRW_REGISTER_TYPE_B;$$.is_default = 0; }
+;
+
+srcimmtype: /* empty */
+ {
+ /* XXX change to default when pragma parse is done */
+ $$ = BRW_REGISTER_TYPE_D;
+ }
+ |TYPE_F { $$ = BRW_REGISTER_TYPE_F; }
+ | TYPE_UD { $$ = BRW_REGISTER_TYPE_UD; }
+ | TYPE_D { $$ = BRW_REGISTER_TYPE_D; }
+ | TYPE_UW { $$ = BRW_REGISTER_TYPE_UW; }
+ | TYPE_W { $$ = BRW_REGISTER_TYPE_W; }
+ | TYPE_V { $$ = BRW_REGISTER_TYPE_V; }
+ | TYPE_VF { $$ = BRW_REGISTER_TYPE_VF; }
+;
+
+/* 1.4.10: Swizzle control */
+/* Returns the swizzle control for an align16 instruction's source operand
+ * in the src0 fields.
+ */
+swizzle: /* empty */
+ {
+ $$.reg.dw1.bits.swizzle = BRW_SWIZZLE_NOOP;
+ }
+ | DOT chansel
+ {
+ $$.reg.dw1.bits.swizzle = BRW_SWIZZLE4($2, $2, $2, $2);
+ }
+ | DOT chansel chansel chansel chansel
+ {
+ $$.reg.dw1.bits.swizzle = BRW_SWIZZLE4($2, $3, $4, $5);
+ }
+;
+
+chansel: X | Y | Z | W
+;
+
+/* 1.4.9: Write mask */
+/* Returns a partially completed struct brw_reg, with just the writemask bits
+ * filled out.
+ */
+writemask: /* empty */
+ {
+ $$.dw1.bits.writemask = BRW_WRITEMASK_XYZW;
+ }
+ | DOT writemask_x writemask_y writemask_z writemask_w
+ {
+ $$.dw1.bits.writemask = $2 | $3 | $4 | $5;
+ }
+;
+
+writemask_x: /* empty */ { $$ = 0; }
+ | X { $$ = 1 << BRW_CHANNEL_X; }
+;
+
+writemask_y: /* empty */ { $$ = 0; }
+ | Y { $$ = 1 << BRW_CHANNEL_Y; }
+;
+
+writemask_z: /* empty */ { $$ = 0; }
+ | Z { $$ = 1 << BRW_CHANNEL_Z; }
+;
+
+writemask_w: /* empty */ { $$ = 0; }
+ | W { $$ = 1 << BRW_CHANNEL_W; }
+;
+
+/* 1.4.11: Immediate values */
+imm32: exp { $$.r = imm32_d; $$.u.d = $1; }
+ | NUMBER { $$.r = imm32_f; $$.u.f = $1; }
+;
+
+/* 1.4.12: Predication and modifiers */
+predicate: /* empty */
+ {
+ $$.pred_control = BRW_PREDICATE_NONE;
+ $$.flag_reg_nr = 0;
+ $$.flag_subreg_nr = 0;
+ $$.pred_inverse = 0;
+ }
+ | LPAREN predstate flagreg predctrl RPAREN
+ {
+ $$.pred_control = $4;
+ $$.flag_reg_nr = $3.nr;
+ $$.flag_subreg_nr = $3.subnr;
+ $$.pred_inverse = $2;
+ }
+;
+
+predstate: /* empty */ { $$ = 0; }
+ | PLUS { $$ = 0; }
+ | MINUS { $$ = 1; }
+;
+
+predctrl: /* empty */ { $$ = BRW_PREDICATE_NORMAL; }
+ | DOT X { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_X; }
+ | DOT Y { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_Y; }
+ | DOT Z { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_Z; }
+ | DOT W { $$ = BRW_PREDICATE_ALIGN16_REPLICATE_W; }
+ | ANYV { $$ = BRW_PREDICATE_ALIGN1_ANYV; }
+ | ALLV { $$ = BRW_PREDICATE_ALIGN1_ALLV; }
+ | ANY2H { $$ = BRW_PREDICATE_ALIGN1_ANY2H; }
+ | ALL2H { $$ = BRW_PREDICATE_ALIGN1_ALL2H; }
+ | ANY4H { $$ = BRW_PREDICATE_ALIGN1_ANY4H; }
+ | ALL4H { $$ = BRW_PREDICATE_ALIGN1_ALL4H; }
+ | ANY8H { $$ = BRW_PREDICATE_ALIGN1_ANY8H; }
+ | ALL8H { $$ = BRW_PREDICATE_ALIGN1_ALL8H; }
+ | ANY16H { $$ = BRW_PREDICATE_ALIGN1_ANY16H; }
+ | ALL16H { $$ = BRW_PREDICATE_ALIGN1_ALL16H; }
+;
+
+negate: /* empty */ { $$ = 0; }
+ | MINUS { $$ = 1; }
+;
+
+abs: /* empty */ { $$ = 0; }
+ | ABS { $$ = 1; }
+;
+
+execsize: /* empty */ %prec EMPTEXECSIZE
+ {
+ $$ = ffs(program_defaults.execute_size) - 1;
+ }
+ |LPAREN exp RPAREN
+ {
+ /* Returns a value for the execution_size field of an
+ * instruction.
+ */
+ if ($2 != 1 && $2 != 2 && $2 != 4 && $2 != 8 && $2 != 16 &&
+ $2 != 32)
+ error(&@2, "Invalid execution size %d\n", $2);
+
+ $$ = ffs($2) - 1;
+ }
+;
+
+saturate: /* empty */ { $$ = BRW_INSTRUCTION_NORMAL; }
+ | SATURATE { $$ = BRW_INSTRUCTION_SATURATE; }
+;
+conditionalmodifier: condition
+ {
+ $$.cond = $1;
+ $$.flag_reg_nr = 0;
+ $$.flag_subreg_nr = -1;
+ }
+ | condition DOT flagreg
+ {
+ $$.cond = $1;
+ $$.flag_reg_nr = ($3.nr & 0xF);
+ $$.flag_subreg_nr = $3.subnr;
+ }
+
+condition: /* empty */ { $$ = BRW_CONDITIONAL_NONE; }
+ | ZERO
+ | EQUAL
+ | NOT_ZERO
+ | NOT_EQUAL
+ | GREATER
+ | GREATER_EQUAL
+ | LESS
+ | LESS_EQUAL
+ | ROUND_INCREMENT
+ | OVERFLOW
+ | UNORDERED
+;
+
+/* 1.4.13: Instruction options */
+instoptions: /* empty */
+ { memset(&$$, 0, sizeof($$)); }
+ | LCURLY instoption_list RCURLY
+ { $$ = $2; }
+;
+
+instoption_list:instoption_list COMMA instoption
+ {
+ $$ = $1;
+ add_option(&$$, $3);
+ }
+ | instoption_list instoption
+ {
+ $$ = $1;
+ add_option(&$$, $2);
+ }
+ | /* empty, header defaults to zeroes. */
+ {
+ memset(&$$, 0, sizeof($$));
+ }
+;
+
+instoption: ALIGN1 { $$ = ALIGN1; }
+ | ALIGN16 { $$ = ALIGN16; }
+ | SECHALF { $$ = SECHALF; }
+ | COMPR { $$ = COMPR; }
+ | SWITCH { $$ = SWITCH; }
+ | ATOMIC { $$ = ATOMIC; }
+ | NODDCHK { $$ = NODDCHK; }
+ | NODDCLR { $$ = NODDCLR; }
+ | MASK_DISABLE { $$ = MASK_DISABLE; }
+ | BREAKPOINT { $$ = BREAKPOINT; }
+ | ACCWRCTRL { $$ = ACCWRCTRL; }
+ | EOT { $$ = EOT; }
+;
+
+%%
+extern int yylineno;
+
+void yyerror (char *msg)
+{
+ fprintf(stderr, "%s: %d: %s at \"%s\"\n",
+ input_filename, yylineno, msg, lex_text());
+ ++errors;
+}
+
+static int get_type_size(unsigned type)
+{
+ int size = 1;
+
+ switch (type) {
+ case BRW_REGISTER_TYPE_F:
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ size = 4;
+ break;
+
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ size = 2;
+ break;
+
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_B:
+ size = 1;
+ break;
+
+ default:
+ assert(0);
+ size = 1;
+ break;
+ }
+
+ return size;
+}
+
+static void reset_instruction_src_region(struct brw_instruction *instr,
+ struct src_operand *src)
+{
+ if (!src->default_region)
+ return;
+
+ if (src->reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ ((src->reg.nr & 0xF0) == BRW_ARF_ADDRESS)) {
+ src->reg.vstride = ffs(0);
+ src->reg.width = BRW_WIDTH_1;
+ src->reg.hstride = ffs(0);
+ } else if (src->reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ ((src->reg.nr & 0xF0) == BRW_ARF_ACCUMULATOR)) {
+ int horiz_stride = 1, width, vert_stride;
+ if (instr->header.compression_control == BRW_COMPRESSION_COMPRESSED) {
+ width = 16;
+ } else {
+ width = 8;
+ }
+
+ if (width > (1 << instr->header.execution_size))
+ width = (1 << instr->header.execution_size);
+
+ vert_stride = horiz_stride * width;
+ src->reg.vstride = ffs(vert_stride);
+ src->reg.width = ffs(width) - 1;
+ src->reg.hstride = ffs(horiz_stride);
+ } else if ((src->reg.file == BRW_ARCHITECTURE_REGISTER_FILE) &&
+ (src->reg.nr == BRW_ARF_NULL) &&
+ (instr->header.opcode == BRW_OPCODE_SEND)) {
+ src->reg.vstride = ffs(8);
+ src->reg.width = BRW_WIDTH_8;
+ src->reg.hstride = ffs(1);
+ } else {
+
+ int horiz_stride = 1, width, vert_stride;
+
+ if (instr->header.execution_size == 0) { /* scalar */
+ horiz_stride = 0;
+ width = 1;
+ vert_stride = 0;
+ } else {
+ if ((instr->header.opcode == BRW_OPCODE_MUL) ||
+ (instr->header.opcode == BRW_OPCODE_MAC) ||
+ (instr->header.opcode == BRW_OPCODE_CMP) ||
+ (instr->header.opcode == BRW_OPCODE_ASR) ||
+ (instr->header.opcode == BRW_OPCODE_ADD) ||
+ (instr->header.opcode == BRW_OPCODE_SHL)) {
+ horiz_stride = 0;
+ width = 1;
+ vert_stride = 0;
+ } else {
+ width = (1 << instr->header.execution_size) / horiz_stride;
+ vert_stride = horiz_stride * width;
+
+ if (get_type_size(src->reg.type) * (width + src->reg.subnr) > 32) {
+ horiz_stride = 0;
+ width = 1;
+ vert_stride = 0;
+ }
+ }
+ }
+
+ src->reg.vstride = ffs(vert_stride);
+ src->reg.width = ffs(width) - 1;
+ src->reg.hstride = ffs(horiz_stride);
+ }
+}
+
+static void set_instruction_opcode(struct brw_program_instruction *instr,
+ unsigned opcode)
+{
+ GEN(instr)->header.opcode = opcode;
+}
+
+/**
+ * Fills in the destination register information in instr from the bits in dst.
+ */
+static int set_instruction_dest(struct brw_program_instruction *instr,
+ struct brw_reg *dest)
+{
+ if (!validate_dst_reg(GEN(instr), dest))
+ return 1;
+
+ /* the assembler support expressing subnr in bytes or in number of
+ * elements. */
+ resolve_subnr(dest);
+
+ brw_set_dest(&genasm_compile, GEN(instr), *dest);
+
+ return 0;
+}
+
+/* Sets the first source operand for the instruction. Returns 0 on success. */
+static int set_instruction_src0(struct brw_program_instruction *instr,
+ struct src_operand *src,
+ YYLTYPE *location)
+{
+
+ if (advanced_flag)
+ reset_instruction_src_region(GEN(instr), src);
+
+ if (!validate_src_reg(GEN(instr), src->reg, location))
+ return 1;
+
+ /* the assembler support expressing subnr in bytes or in number of
+ * elements. */
+ resolve_subnr(&src->reg);
+
+ brw_set_src0(&genasm_compile, GEN(instr), src->reg);
+
+ return 0;
+}
+
+/* Sets the second source operand for the instruction. Returns 0 on success.
+ */
+static int set_instruction_src1(struct brw_program_instruction *instr,
+ struct src_operand *src,
+ YYLTYPE *location)
+{
+ if (advanced_flag)
+ reset_instruction_src_region(GEN(instr), src);
+
+ if (!validate_src_reg(GEN(instr), src->reg, location))
+ return 1;
+
+ /* the assembler support expressing subnr in bytes or in number of
+ * elements. */
+ resolve_subnr(&src->reg);
+
+ brw_set_src1(&genasm_compile, GEN(instr), src->reg);
+
+ return 0;
+}
+
+static int set_instruction_dest_three_src(struct brw_program_instruction *instr,
+ struct brw_reg *dest)
+{
+ resolve_subnr(dest);
+ brw_set_3src_dest(&genasm_compile, GEN(instr), *dest);
+ return 0;
+}
+
+static int set_instruction_src0_three_src(struct brw_program_instruction *instr,
+ struct src_operand *src)
+{
+ if (advanced_flag)
+ reset_instruction_src_region(GEN(instr), src);
+
+ resolve_subnr(&src->reg);
+
+ // TODO: src0 modifier, src0 rep_ctrl
+ brw_set_3src_src0(&genasm_compile, GEN(instr), src->reg);
+ return 0;
+}
+
+static int set_instruction_src1_three_src(struct brw_program_instruction *instr,
+ struct src_operand *src)
+{
+ if (advanced_flag)
+ reset_instruction_src_region(GEN(instr), src);
+
+ resolve_subnr(&src->reg);
+
+ // TODO: src1 modifier, src1 rep_ctrl
+ brw_set_3src_src1(&genasm_compile, GEN(instr), src->reg);
+ return 0;
+}
+
+static int set_instruction_src2_three_src(struct brw_program_instruction *instr,
+ struct src_operand *src)
+{
+ if (advanced_flag)
+ reset_instruction_src_region(GEN(instr), src);
+
+ resolve_subnr(&src->reg);
+
+ // TODO: src2 modifier, src2 rep_ctrl
+ brw_set_3src_src2(&genasm_compile, GEN(instr), src->reg);
+ return 0;
+}
+
+static void set_instruction_saturate(struct brw_program_instruction *instr,
+ int saturate)
+{
+ GEN(instr)->header.saturate = saturate;
+}
+
+static void set_instruction_options(struct brw_program_instruction *instr,
+ struct options options)
+{
+ GEN(instr)->header.access_mode = options.access_mode;
+ GEN(instr)->header.compression_control = options.compression_control;
+ GEN(instr)->header.thread_control = options.thread_control;
+ GEN(instr)->header.dependency_control = options.dependency_control;
+ GEN(instr)->header.mask_control = options.mask_control;
+ GEN(instr)->header.debug_control = options.debug_control;
+ GEN(instr)->header.acc_wr_control = options.acc_wr_control;
+ GEN(instr)->bits3.generic.end_of_thread = options.end_of_thread;
+}
+
+static void set_instruction_predicate(struct brw_program_instruction *instr,
+ struct predicate *p)
+{
+ GEN(instr)->header.predicate_control = p->pred_control;
+ GEN(instr)->header.predicate_inverse = p->pred_inverse;
+ GEN(instr)->bits2.da1.flag_reg_nr = p->flag_reg_nr;
+ GEN(instr)->bits2.da1.flag_subreg_nr = p->flag_subreg_nr;
+}
+
+static void set_instruction_pred_cond(struct brw_program_instruction *instr,
+ struct predicate *p,
+ struct condition *c,
+ YYLTYPE *location)
+{
+ set_instruction_predicate(instr, p);
+ GEN(instr)->header.destreg__conditionalmod = c->cond;
+
+ if (c->flag_subreg_nr == -1)
+ return;
+
+ if (p->pred_control != BRW_PREDICATE_NONE &&
+ (p->flag_reg_nr != c->flag_reg_nr ||
+ p->flag_subreg_nr != c->flag_subreg_nr))
+ {
+ warn(ALWAYS, location, "must use the same flag register if both "
+ "prediction and conditional modifier are enabled\n");
+ }
+
+ GEN(instr)->bits2.da1.flag_reg_nr = c->flag_reg_nr;
+ GEN(instr)->bits2.da1.flag_subreg_nr = c->flag_subreg_nr;
+}
+
+static void set_direct_dst_operand(struct brw_reg *dst, struct brw_reg *reg,
+ int type)
+{
+ *dst = *reg;
+ dst->address_mode = BRW_ADDRESS_DIRECT;
+ dst->type = type;
+ dst->hstride = 1;
+ dst->dw1.bits.writemask = BRW_WRITEMASK_XYZW;
+}
+
+static void set_direct_src_operand(struct src_operand *src, struct brw_reg *reg,
+ int type)
+{
+ memset(src, 0, sizeof(*src));
+ src->reg.address_mode = BRW_ADDRESS_DIRECT;
+ src->reg.file = reg->file;
+ src->reg.type = type;
+ src->reg.subnr = reg->subnr;
+ src->reg.nr = reg->nr;
+ src->reg.vstride = 0;
+ src->reg.width = 0;
+ src->reg.hstride = 0;
+ src->reg.negate = 0;
+ src->reg.abs = 0;
+ SWIZZLE(src->reg) = BRW_SWIZZLE_NOOP;
+}
diff --git a/assembler/intel-gen4asm.pc.in b/assembler/intel-gen4asm.pc.in
new file mode 100644
index 0000000..54febc4
--- /dev/null
+++ b/assembler/intel-gen4asm.pc.in
@@ -0,0 +1,10 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: intel-gen4asm
+Description: An assembler compiler for the Intel 965+ Chipset
+Version: @VERSION@
+Libs:
+Cflags:
diff --git a/assembler/lex.l b/assembler/lex.l
new file mode 100644
index 0000000..769d98b
--- /dev/null
+++ b/assembler/lex.l
@@ -0,0 +1,440 @@
+%option yylineno
+%{
+#include <string.h>
+#include "gen4asm.h"
+#include "gram.h"
+#include "brw_defines.h"
+
+#include "string.h"
+int saved_state = 0;
+extern char *input_filename;
+
+/* Locations */
+int yycolumn = 1;
+
+#define YY_USER_ACTION \
+ yylloc.first_line = yylloc.last_line = yylineno; \
+ yylloc.first_column = yycolumn; \
+ yylloc.last_column = yycolumn+yyleng-1; \
+ yycolumn += yyleng;
+
+%}
+%x BLOCK_COMMENT
+%x CHANNEL
+%x LINENUMBER
+%x FILENAME
+
+%%
+\/\/.*[\r\n] { yycolumn = 1; } /* eat up single-line comments */
+"\.kernel".*[\r\n] { yycolumn = 1; }
+"\.end_kernel".*[\r\n] { yycolumn = 1; }
+"\.code".*[\r\n] { yycolumn = 1; }
+"\.end_code".*[\r\n] { yycolumn = 1; }
+
+ /* eat up multi-line comments, non-nesting. */
+\/\* {
+ saved_state = YYSTATE;
+ BEGIN(BLOCK_COMMENT);
+}
+<BLOCK_COMMENT>\*\/ {
+ BEGIN(saved_state);
+}
+<BLOCK_COMMENT>. { }
+<BLOCK_COMMENT>[\r\n] { }
+"#line"" "* {
+ yycolumn = 1;
+ saved_state = YYSTATE;
+ BEGIN(LINENUMBER);
+}
+<LINENUMBER>[0-9]+" "* {
+ yylineno = atoi (yytext) - 1;
+ BEGIN(FILENAME);
+}
+<FILENAME>\"[^\"]+\" {
+ char *name = malloc (yyleng - 1);
+ memmove (name, yytext + 1, yyleng - 2);
+ name[yyleng-1] = '\0';
+ input_filename = name;
+ BEGIN(saved_state);
+}
+
+<CHANNEL>"x" {
+ yylval.integer = BRW_CHANNEL_X;
+ return X;
+}
+<CHANNEL>"y" {
+ yylval.integer = BRW_CHANNEL_Y;
+ return Y;
+}
+<CHANNEL>"z" {
+ yylval.integer = BRW_CHANNEL_Z;
+ return Z;
+}
+<CHANNEL>"w" {
+yylval.integer = BRW_CHANNEL_W;
+ return W;
+}
+<CHANNEL>. {
+ yyless(0);
+ BEGIN(INITIAL);
+}
+
+ /* used for both null send and null register. */
+"null" { return NULL_TOKEN; }
+
+ /* opcodes */
+"mov" { yylval.integer = BRW_OPCODE_MOV; return MOV; }
+"frc" { yylval.integer = BRW_OPCODE_FRC; return FRC; }
+"rndu" { yylval.integer = BRW_OPCODE_RNDU; return RNDU; }
+"rndd" { yylval.integer = BRW_OPCODE_RNDD; return RNDD; }
+"rnde" { yylval.integer = BRW_OPCODE_RNDE; return RNDE; }
+"rndz" { yylval.integer = BRW_OPCODE_RNDZ; return RNDZ; }
+"not" { yylval.integer = BRW_OPCODE_NOT; return NOT; }
+"lzd" { yylval.integer = BRW_OPCODE_LZD; return LZD; }
+"f16to32" { yylval.integer = BRW_OPCODE_F16TO32; return F16TO32; }
+"f32to16" { yylval.integer = BRW_OPCODE_F32TO16; return F32TO16; }
+"fbh" { yylval.integer = BRW_OPCODE_FBH; return FBH; }
+"fbl" { yylval.integer = BRW_OPCODE_FBL; return FBL; }
+
+"mad" { yylval.integer = BRW_OPCODE_MAD; return MAD; }
+"lrp" { yylval.integer = BRW_OPCODE_LRP; return LRP; }
+"bfe" { yylval.integer = BRW_OPCODE_BFE; return BFE; }
+"bfi1" { yylval.integer = BRW_OPCODE_BFI1; return BFI1; }
+"bfi2" { yylval.integer = BRW_OPCODE_BFI2; return BFI2; }
+"bfrev" { yylval.integer = BRW_OPCODE_BFREV; return BFREV; }
+"mul" { yylval.integer = BRW_OPCODE_MUL; return MUL; }
+"mac" { yylval.integer = BRW_OPCODE_MAC; return MAC; }
+"mach" { yylval.integer = BRW_OPCODE_MACH; return MACH; }
+"line" { yylval.integer = BRW_OPCODE_LINE; return LINE; }
+"sad2" { yylval.integer = BRW_OPCODE_SAD2; return SAD2; }
+"sada2" { yylval.integer = BRW_OPCODE_SADA2; return SADA2; }
+"dp4" { yylval.integer = BRW_OPCODE_DP4; return DP4; }
+"dph" { yylval.integer = BRW_OPCODE_DPH; return DPH; }
+"dp3" { yylval.integer = BRW_OPCODE_DP3; return DP3; }
+"dp2" { yylval.integer = BRW_OPCODE_DP2; return DP2; }
+
+"cbit" { yylval.integer = BRW_OPCODE_CBIT; return CBIT; }
+"avg" { yylval.integer = BRW_OPCODE_AVG; return AVG; }
+"add" { yylval.integer = BRW_OPCODE_ADD; return ADD; }
+"addc" { yylval.integer = BRW_OPCODE_ADDC; return ADDC; }
+"sel" { yylval.integer = BRW_OPCODE_SEL; return SEL; }
+"and" { yylval.integer = BRW_OPCODE_AND; return AND; }
+"or" { yylval.integer = BRW_OPCODE_OR; return OR; }
+"xor" { yylval.integer = BRW_OPCODE_XOR; return XOR; }
+"shr" { yylval.integer = BRW_OPCODE_SHR; return SHR; }
+"shl" { yylval.integer = BRW_OPCODE_SHL; return SHL; }
+"asr" { yylval.integer = BRW_OPCODE_ASR; return ASR; }
+"cmp" { yylval.integer = BRW_OPCODE_CMP; return CMP; }
+"cmpn" { yylval.integer = BRW_OPCODE_CMPN; return CMPN; }
+"subb" { yylval.integer = BRW_OPCODE_SUBB; return SUBB; }
+
+"send" { yylval.integer = BRW_OPCODE_SEND; return SEND; }
+"nop" { yylval.integer = BRW_OPCODE_NOP; return NOP; }
+"jmpi" { yylval.integer = BRW_OPCODE_JMPI; return JMPI; }
+"if" { yylval.integer = BRW_OPCODE_IF; return IF; }
+"iff" { yylval.integer = BRW_OPCODE_IFF; return IFF; }
+"while" { yylval.integer = BRW_OPCODE_WHILE; return WHILE; }
+"else" { yylval.integer = BRW_OPCODE_ELSE; return ELSE; }
+"break" { yylval.integer = BRW_OPCODE_BREAK; return BREAK; }
+"cont" { yylval.integer = BRW_OPCODE_CONTINUE; return CONT; }
+"halt" { yylval.integer = BRW_OPCODE_HALT; return HALT; }
+"msave" { yylval.integer = BRW_OPCODE_MSAVE; return MSAVE; }
+"push" { yylval.integer = BRW_OPCODE_PUSH; return PUSH; }
+"mrest" { yylval.integer = BRW_OPCODE_MRESTORE; return MREST; }
+"pop" { yylval.integer = BRW_OPCODE_POP; return POP; }
+"wait" { yylval.integer = BRW_OPCODE_WAIT; return WAIT; }
+"do" { yylval.integer = BRW_OPCODE_DO; return DO; }
+"endif" { yylval.integer = BRW_OPCODE_ENDIF; return ENDIF; }
+"call" { yylval.integer = BRW_OPCODE_CALL; return CALL; }
+"ret" { yylval.integer = BRW_OPCODE_RET; return RET; }
+"brd" { yylval.integer = BRW_OPCODE_BRD; return BRD; }
+"brc" { yylval.integer = BRW_OPCODE_BRC; return BRC; }
+
+"pln" { yylval.integer = BRW_OPCODE_PLN; return PLN; }
+
+ /* send argument tokens */
+"mlen" { return MSGLEN; }
+"rlen" { return RETURNLEN; }
+"math" { if (IS_GENp(6)) { yylval.integer = BRW_OPCODE_MATH; return MATH_INST; } else return MATH; }
+"sampler" { return SAMPLER; }
+"gateway" { return GATEWAY; }
+"read" { return READ; }
+"write" { return WRITE; }
+"urb" { return URB; }
+"thread_spawner" { return THREAD_SPAWNER; }
+"vme" { return VME; }
+"cre" { return CRE; }
+"data_port" { return DATA_PORT; }
+
+"allocate" { return ALLOCATE; }
+"used" { return USED; }
+"complete" { return COMPLETE; }
+"transpose" { return TRANSPOSE; }
+"interleave" { return INTERLEAVE; }
+
+";" { return SEMICOLON; }
+"(" { return LPAREN; }
+")" { return RPAREN; }
+"<" { return LANGLE; }
+">" { return RANGLE; }
+"{" { return LCURLY; }
+"}" { return RCURLY; }
+"[" { return LSQUARE; }
+"]" { return RSQUARE; }
+"," { return COMMA; }
+"." { BEGIN(CHANNEL); return DOT; }
+"+" { return PLUS; }
+"-" { return MINUS; }
+"*" { return MULTIPLY;}
+"/" { return DIVIDE; }
+":" { return COLON; }
+"=" { return EQ; }
+"(abs)" { return ABS; }
+
+ /* Most register accesses are lexed as REGFILE[0-9]+, to prevent the register
+ * with subreg from being lexed as REGFILE NUMBER instead of
+ * REGISTER INTEGER DOT INTEGER like we want. The alternative was to use a
+ * start condition, which wasn't very clean-looking.
+ *
+ * However, this means we need to lex the general and message register file
+ * characters as well, for register-indirect access which is formatted
+ * like g[a#.#] or m[a#.#].
+ */
+"acc"[0-9]+ {
+ yylval.integer = atoi(yytext + 3);
+ return ACCREG;
+}
+"a"[0-9]+ {
+ yylval.integer = atoi(yytext + 1);
+ return ADDRESSREG;
+}
+"m"[0-9]+ {
+ yylval.integer = atoi(yytext + 1);
+ return MSGREG;
+}
+"m" {
+ return MSGREGFILE;
+}
+"mask"[0-9]+ {
+ yylval.integer = atoi(yytext + 4);
+ return MASKREG;
+}
+"ms"[0-9]+ {
+ yylval.integer = atoi(yytext + 2);
+ return MASKSTACKREG;
+}
+"msd"[0-9]+ {
+ yylval.integer = atoi(yytext + 3);
+ return MASKSTACKDEPTHREG;
+}
+
+"n0."[0-9]+ {
+ yylval.integer = atoi(yytext + 3);
+ return NOTIFYREG;
+}
+
+"n"[0-9]+ {
+ yylval.integer = atoi(yytext + 1);
+ return NOTIFYREG;
+}
+
+"f"[0-9] {
+ yylval.integer = atoi(yytext + 1);
+ return FLAGREG;
+}
+
+[gr][0-9]+ {
+ yylval.integer = atoi(yytext + 1);
+ return GENREG;
+}
+[gr] {
+ return GENREGFILE;
+}
+"cr"[0-9]+ {
+ yylval.integer = atoi(yytext + 2);
+ return CONTROLREG;
+}
+"sr"[0-9]+ {
+ yylval.integer = atoi(yytext + 2);
+ return STATEREG;
+}
+"ip" {
+ return IPREG;
+}
+"amask" {
+ yylval.integer = BRW_AMASK;
+ return AMASK;
+}
+"imask" {
+ yylval.integer = BRW_IMASK;
+ return IMASK;
+}
+"lmask" {
+ yylval.integer = BRW_LMASK;
+ return LMASK;
+}
+"cmask" {
+ yylval.integer = BRW_CMASK;
+ return CMASK;
+}
+"imsd" {
+ yylval.integer = 0;
+ return IMSD;
+}
+"lmsd" {
+ yylval.integer = 1;
+ return LMSD;
+}
+"ims" {
+ yylval.integer = 0;
+ return IMS;
+}
+"lms" {
+ yylval.integer = 16;
+ return LMS;
+}
+
+ /*
+ * Lexing of register types should probably require the ":" symbol specified
+ * in the BNF of the assembly, but our existing source didn't use that syntax.
+ */
+"UD" { return TYPE_UD; }
+":UD" { return TYPE_UD; }
+"D" { return TYPE_D; }
+":D" { return TYPE_D; }
+"UW" { return TYPE_UW; }
+":UW" { return TYPE_UW; }
+"W" { return TYPE_W; }
+":W" { return TYPE_W; }
+"UB" { return TYPE_UB; }
+":UB" { return TYPE_UB; }
+"B" { return TYPE_B; }
+":B" { return TYPE_B; }
+"F" { return TYPE_F; }
+":F" { return TYPE_F; }
+"VF" {return TYPE_VF; }
+":VF" {return TYPE_VF; }
+"V" { return TYPE_V; }
+":V" { return TYPE_V; }
+
+#".kernel" { return KERNEL_PRAGMA;}
+#".end_kernel" { return END_KERNEL_PRAGMA;}
+#".code" { return CODE_PRAGMA;}
+#".end_code" { return END_CODE_PRAGMA;}
+".reg_count_payload" { return REG_COUNT_PAYLOAD_PRAGMA; }
+".reg_count_total" { return REG_COUNT_TOTAL_PRAGMA; }
+".default_execution_size" { return DEFAULT_EXEC_SIZE_PRAGMA; }
+".default_register_type" { return DEFAULT_REG_TYPE_PRAGMA; }
+".declare" { return DECLARE_PRAGMA; }
+"Base" { return BASE; }
+"ElementSize" { return ELEMENTSIZE; }
+"SrcRegion" { return SRCREGION; }
+"DstRegion" { return DSTREGION; }
+"Type" { return TYPE; }
+
+
+".sat" { return SATURATE; }
+"align1" { return ALIGN1; }
+"align16" { return ALIGN16; }
+"sechalf" { return SECHALF; }
+"compr" { return COMPR; }
+"switch" { return SWITCH; }
+"atomic" { return ATOMIC; }
+"noddchk" { return NODDCHK; }
+"noddclr" { return NODDCLR; }
+"mask_disable" { return MASK_DISABLE; }
+"nomask" { return MASK_DISABLE; }
+"breakpoint" { return BREAKPOINT; }
+"accwrctrl" { return ACCWRCTRL; }
+"EOT" { return EOT; }
+
+ /* extended math functions */
+"inv" { yylval.integer = BRW_MATH_FUNCTION_INV; return SIN; }
+"log" { yylval.integer = BRW_MATH_FUNCTION_LOG; return LOG; }
+"exp" { yylval.integer = BRW_MATH_FUNCTION_EXP; return EXP; }
+"sqrt" { yylval.integer = BRW_MATH_FUNCTION_SQRT; return SQRT; }
+"rsq" { yylval.integer = BRW_MATH_FUNCTION_RSQ; return RSQ; }
+"pow" { yylval.integer = BRW_MATH_FUNCTION_POW; return POW; }
+"sin" { yylval.integer = BRW_MATH_FUNCTION_SIN; return SIN; }
+"cos" { yylval.integer = BRW_MATH_FUNCTION_COS; return COS; }
+"sincos" { yylval.integer = BRW_MATH_FUNCTION_SINCOS; return SINCOS; }
+"intdiv" {
+ yylval.integer = BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
+ return INTDIV;
+}
+"intmod" {
+ yylval.integer = BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
+ return INTMOD;
+}
+"intdivmod" {
+ yylval.integer = BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER;
+ return INTDIVMOD;
+}
+
+"signed" { return SIGNED; }
+"scalar" { return SCALAR; }
+
+ /* predicate control */
+".anyv" { return ANYV; }
+".allv" { return ALLV; }
+".any2h" { return ANY2H; }
+".all2h" { return ALL2H; }
+".any4h" { return ANY4H; }
+".all4h" { return ALL4H; }
+".any8h" { return ANY8H; }
+".all8h" { return ALL8H; }
+".any16h" { return ANY16H; }
+".all16h" { return ALL16H; }
+
+".z" { yylval.integer = BRW_CONDITIONAL_Z; return ZERO; }
+".e" { yylval.integer = BRW_CONDITIONAL_Z; return EQUAL; }
+".nz" { yylval.integer = BRW_CONDITIONAL_NZ; return NOT_ZERO; }
+".ne" { yylval.integer = BRW_CONDITIONAL_NZ; return NOT_EQUAL; }
+".g" { yylval.integer = BRW_CONDITIONAL_G; return GREATER; }
+".ge" { yylval.integer = BRW_CONDITIONAL_GE; return GREATER_EQUAL; }
+".l" { yylval.integer = BRW_CONDITIONAL_L; return LESS; }
+".le" { yylval.integer = BRW_CONDITIONAL_LE; return LESS_EQUAL; }
+".r" { yylval.integer = BRW_CONDITIONAL_R; return ROUND_INCREMENT; }
+".o" { yylval.integer = BRW_CONDITIONAL_O; return OVERFLOW; }
+".u" { yylval.integer = BRW_CONDITIONAL_U; return UNORDERED; }
+
+[a-zA-Z_][0-9a-zA-Z_]* {
+ yylval.string = strdup(yytext);
+ return STRING;
+}
+
+0x[0-9a-fA-F][0-9a-fA-F]* {
+ yylval.integer = strtoul(yytext + 2, NULL, 16);
+ return INTEGER;
+}
+[0-9][0-9]* {
+ yylval.integer = strtoul(yytext, NULL, 10);
+ return INTEGER;
+}
+
+<INITIAL>[-]?[0-9]+"."[0-9]+ {
+ yylval.number = strtod(yytext, NULL);
+ return NUMBER;
+}
+
+[ \t]+ { } /* eat up whitespace */
+
+\n { yycolumn = 1; }
+
+. {
+ fprintf(stderr, "%s: %d: %s at \"%s\"\n",
+ input_filename, yylineno, "unexpected token", lex_text());
+ }
+%%
+
+char *
+lex_text(void)
+{
+ return yytext;
+ (void) yyunput;
+}
+
+#ifndef yywrap
+int yywrap() { return 1; }
+#endif
+
diff --git a/assembler/main.c b/assembler/main.c
new file mode 100644
index 0000000..05ca337
--- /dev/null
+++ b/assembler/main.c
@@ -0,0 +1,520 @@
+/* -*- c-basic-offset: 8 -*- */
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include "ralloc.h"
+#include "gen4asm.h"
+#include "brw_eu.h"
+
+extern FILE *yyin;
+
+long int gen_level = 40;
+int advanced_flag = 0; /* 0: in unit of byte, 1: in unit of data element size */
+unsigned int warning_flags = WARN_ALWAYS;
+int need_export = 0;
+char *input_filename = "<stdin>";
+int errors;
+
+struct brw_context genasm_brw_context;
+struct brw_compile genasm_compile;
+
+struct brw_program compiled_program;
+struct program_defaults program_defaults = {.register_type = BRW_REGISTER_TYPE_F};
+
+/* 0: default output style, 1: nice C-style output */
+static int binary_like_output = 0;
+static char *export_filename = NULL;
+static const char binary_prepend[] = "static const char gen_eu_bytes[] = {\n";
+
+#define HASH_SIZE 37
+
+struct hash_item {
+ char *key;
+ void *value;
+ struct hash_item *next;
+};
+
+typedef struct hash_item *hash_table[HASH_SIZE];
+
+static hash_table declared_register_table;
+
+struct label_item {
+ char *name;
+ int addr;
+ struct label_item *next;
+};
+static struct label_item *label_table;
+
+static const struct option longopts[] = {
+ {"advanced", no_argument, 0, 'a'},
+ {"binary", no_argument, 0, 'b'},
+ {"export", required_argument, 0, 'e'},
+ {"input_list", required_argument, 0, 'l'},
+ {"output", required_argument, 0, 'o'},
+ {"gen", required_argument, 0, 'g'},
+ { NULL, 0, NULL, 0 }
+};
+
+// jump distance used in branch instructions as JIP or UIP
+static int jump_distance(int offset)
+{
+ // Gen4- bspec: the jump distance is in number of sixteen-byte units
+ // Gen5+ bspec: the jump distance is in number of eight-byte units
+ if(IS_GENp(5))
+ offset *= 2;
+ return offset;
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "usage: intel-gen4asm [options] inputfile\n");
+ fprintf(stderr, "OPTIONS:\n");
+ fprintf(stderr, "\t-a, --advanced Set advanced flag\n");
+ fprintf(stderr, "\t-b, --binary C style binary output\n");
+ fprintf(stderr, "\t-e, --export {exportfile} Export label file\n");
+ fprintf(stderr, "\t-l, --input_list {entrytablefile} Input entry_table_list file\n");
+ fprintf(stderr, "\t-o, --output {outputfile} Specify output file\n");
+ fprintf(stderr, "\t-g, --gen <4|5|6|7> Specify GPU generation\n");
+}
+
+static int hash(char *key)
+{
+ unsigned ret = 0;
+ while(*key)
+ ret = (ret << 1) + (*key++);
+ return ret % HASH_SIZE;
+}
+
+static void *find_hash_item(hash_table t, char *key)
+{
+ struct hash_item *p;
+ for(p = t[hash(key)]; p; p = p->next)
+ if(strcasecmp(p->key, key) == 0)
+ return p->value;
+ return NULL;
+}
+
+static void insert_hash_item(hash_table t, char *key, void *v)
+{
+ int index = hash(key);
+ struct hash_item *p = malloc(sizeof(*p));
+ p->key = key;
+ p->value = v;
+ p->next = t[index];
+ t[index] = p;
+}
+
+static void free_hash_table(hash_table t)
+{
+ struct hash_item *p, *next;
+ int i;
+ for (i = 0; i < HASH_SIZE; i++) {
+ p = t[i];
+ while(p) {
+ next = p->next;
+ free(p->key);
+ free(p->value);
+ free(p);
+ p = next;
+ }
+ }
+}
+
+struct declared_register *find_register(char *name)
+{
+ return find_hash_item(declared_register_table, name);
+}
+
+void insert_register(struct declared_register *reg)
+{
+ insert_hash_item(declared_register_table, reg->name, reg);
+}
+
+static void add_label(struct brw_program_instruction *i)
+{
+ struct label_item **p = &label_table;
+
+ assert(is_label(i));
+
+ while(*p)
+ p = &((*p)->next);
+ *p = calloc(1, sizeof(**p));
+ (*p)->name = label_name(i);
+ (*p)->addr = i->inst_offset;
+}
+
+/* Some assembly code have duplicated labels.
+ Start from start_addr. Search as a loop. Return the first label found. */
+static int label_to_addr(char *name, int start_addr)
+{
+ /* return the first label just after start_addr, or the first label from the head */
+ struct label_item *p;
+ int r = -1;
+ for(p = label_table; p; p = p->next) {
+ if(strcmp(p->name, name) == 0) {
+ if(p->addr >= start_addr) // the first label just after start_addr
+ return p->addr;
+ else if(r == -1) // the first label from the head
+ r = p->addr;
+ }
+ }
+ if(r == -1) {
+ fprintf(stderr, "Can't find label %s\n", name);
+ exit(1);
+ }
+ return r;
+}
+
+static void free_label_table(struct label_item *p)
+{
+ if(p) {
+ free_label_table(p->next);
+ free(p);
+ }
+}
+
+struct entry_point_item {
+ char *str;
+ struct entry_point_item *next;
+} *entry_point_table;
+
+static int read_entry_file(char *fn)
+{
+ FILE *entry_table_file;
+ char buf[2048];
+ struct entry_point_item **p = &entry_point_table;
+ if (!fn)
+ return 0;
+ if ((entry_table_file = fopen(fn, "r")) == NULL)
+ return -1;
+ while (fgets(buf, sizeof(buf)-1, entry_table_file) != NULL) {
+ // drop the final char '\n'
+ if(buf[strlen(buf)-1] == '\n')
+ buf[strlen(buf)-1] = 0;
+ *p = calloc(1, sizeof(struct entry_point_item));
+ (*p)->str = strdup(buf);
+ p = &((*p)->next);
+ }
+ fclose(entry_table_file);
+ return 0;
+}
+
+static int is_entry_point(struct brw_program_instruction *i)
+{
+ struct entry_point_item *p;
+
+ assert(i->type == GEN4ASM_INSTRUCTION_LABEL);
+
+ for (p = entry_point_table; p; p = p->next) {
+ if (strcmp(p->str, i->insn.label.name) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+static void free_entry_point_table(struct entry_point_item *p) {
+ if (p) {
+ free_entry_point_table(p->next);
+ free(p->str);
+ free(p);
+ }
+}
+
+static void
+print_instruction(FILE *output, struct brw_instruction *instruction)
+{
+ if (binary_like_output) {
+ fprintf(output, "\t0x%02x, 0x%02x, 0x%02x, 0x%02x, "
+ "0x%02x, 0x%02x, 0x%02x, 0x%02x,\n"
+ "\t0x%02x, 0x%02x, 0x%02x, 0x%02x, "
+ "0x%02x, 0x%02x, 0x%02x, 0x%02x,\n",
+ ((unsigned char *)instruction)[0],
+ ((unsigned char *)instruction)[1],
+ ((unsigned char *)instruction)[2],
+ ((unsigned char *)instruction)[3],
+ ((unsigned char *)instruction)[4],
+ ((unsigned char *)instruction)[5],
+ ((unsigned char *)instruction)[6],
+ ((unsigned char *)instruction)[7],
+ ((unsigned char *)instruction)[8],
+ ((unsigned char *)instruction)[9],
+ ((unsigned char *)instruction)[10],
+ ((unsigned char *)instruction)[11],
+ ((unsigned char *)instruction)[12],
+ ((unsigned char *)instruction)[13],
+ ((unsigned char *)instruction)[14],
+ ((unsigned char *)instruction)[15]);
+ } else {
+ fprintf(output, " { 0x%08x, 0x%08x, 0x%08x, 0x%08x },\n",
+ ((int *)instruction)[0],
+ ((int *)instruction)[1],
+ ((int *)instruction)[2],
+ ((int *)instruction)[3]);
+ }
+}
+int main(int argc, char **argv)
+{
+ char *output_file = NULL;
+ char *entry_table_file = NULL;
+ FILE *output = stdout;
+ FILE *export_file;
+ struct brw_program_instruction *entry, *entry1, *tmp_entry;
+ int err, inst_offset;
+ char o;
+ void *mem_ctx;
+
+ while ((o = getopt_long(argc, argv, "e:l:o:g:abW", longopts, NULL)) != -1) {
+ switch (o) {
+ case 'o':
+ if (strcmp(optarg, "-") != 0)
+ output_file = optarg;
+
+ break;
+
+ case 'g': {
+ char *dec_ptr, *end_ptr;
+ unsigned long decimal;
+
+ gen_level = strtol(optarg, &dec_ptr, 10) * 10;
+
+ if (*dec_ptr == '.') {
+ decimal = strtoul(++dec_ptr, &end_ptr, 10);
+ if (end_ptr != dec_ptr && *end_ptr == '\0') {
+ if (decimal > 10) {
+ fprintf(stderr, "Invalid Gen X decimal version\n");
+ exit(1);
+ }
+ gen_level += decimal;
+ }
+ }
+
+ if (gen_level < 40 || gen_level > 75) {
+ usage();
+ exit(1);
+ }
+
+ break;
+ }
+
+ case 'a':
+ advanced_flag = 1;
+ break;
+ case 'b':
+ binary_like_output = 1;
+ break;
+
+ case 'e':
+ need_export = 1;
+ if (strcmp(optarg, "-") != 0)
+ export_filename = optarg;
+ break;
+
+ case 'l':
+ if (strcmp(optarg, "-") != 0)
+ entry_table_file = optarg;
+ break;
+
+ case 'W':
+ warning_flags |= WARN_ALL;
+ break;
+
+ default:
+ usage();
+ exit(1);
+ }
+ }
+ argc -= optind;
+ argv += optind;
+ if (argc != 1) {
+ usage();
+ exit(1);
+ }
+
+ if (strcmp(argv[0], "-") != 0) {
+ input_filename = argv[0];
+ yyin = fopen(input_filename, "r");
+ if (yyin == NULL) {
+ perror("Couldn't open input file");
+ exit(1);
+ }
+ }
+
+ brw_init_context(&genasm_brw_context, gen_level);
+ mem_ctx = ralloc_context(NULL);
+ brw_init_compile(&genasm_brw_context, &genasm_compile, mem_ctx);
+
+ err = yyparse();
+
+ if (strcmp(argv[0], "-"))
+ fclose(yyin);
+
+ yylex_destroy();
+
+ if (err || errors)
+ exit (1);
+
+ if (output_file) {
+ output = fopen(output_file, "w");
+ if (output == NULL) {
+ perror("Couldn't open output file");
+ exit(1);
+ }
+
+ }
+
+ if (read_entry_file(entry_table_file)) {
+ fprintf(stderr, "Read entry file error\n");
+ exit(1);
+ }
+ inst_offset = 0 ;
+ for (entry = compiled_program.first;
+ entry != NULL; entry = entry->next) {
+ entry->inst_offset = inst_offset;
+ entry1 = entry->next;
+ if (entry1 && is_label(entry1) && is_entry_point(entry1)) {
+ // insert NOP instructions until (inst_offset+1) % 4 == 0
+ while (((inst_offset+1) % 4) != 0) {
+ tmp_entry = calloc(sizeof(*tmp_entry), 1);
+ tmp_entry->insn.gen.header.opcode = BRW_OPCODE_NOP;
+ entry->next = tmp_entry;
+ tmp_entry->next = entry1;
+ entry = tmp_entry;
+ tmp_entry->inst_offset = ++inst_offset;
+ }
+ }
+ if (!is_label(entry))
+ inst_offset++;
+ }
+
+ for (entry = compiled_program.first; entry; entry = entry->next)
+ if (is_label(entry))
+ add_label(entry);
+
+ if (need_export) {
+ if (export_filename) {
+ export_file = fopen(export_filename, "w");
+ } else {
+ export_file = fopen("export.inc", "w");
+ }
+ for (entry = compiled_program.first;
+ entry != NULL; entry = entry->next) {
+ if (is_label(entry))
+ fprintf(export_file, "#define %s_IP %d\n",
+ label_name(entry), (IS_GENx(5) ? 2 : 1)*(entry->inst_offset));
+ }
+ fclose(export_file);
+ }
+
+ for (entry = compiled_program.first; entry; entry = entry->next) {
+ struct relocation *reloc = &entry->reloc;
+ struct brw_instruction *inst = &entry->insn.gen;
+
+ if (!is_relocatable(entry))
+ continue;
+
+ if (reloc->first_reloc_target)
+ reloc->first_reloc_offset = label_to_addr(reloc->first_reloc_target, entry->inst_offset) - entry->inst_offset;
+
+ if (reloc->second_reloc_target)
+ reloc->second_reloc_offset = label_to_addr(reloc->second_reloc_target, entry->inst_offset) - entry->inst_offset;
+
+ if (reloc->second_reloc_offset) {
+ // this is a branch instruction with two offset arguments
+ inst->bits3.break_cont.jip = jump_distance(reloc->first_reloc_offset);
+ inst->bits3.break_cont.uip = jump_distance(reloc->second_reloc_offset);
+ } else if (reloc->first_reloc_offset) {
+ // this is a branch instruction with one offset argument
+ int offset = reloc->first_reloc_offset;
+ /* bspec: Unlike other flow control instructions, the offset used by JMPI is relative to the incremented instruction pointer rather than the IP value for the instruction itself. */
+
+ int is_jmpi = inst->header.opcode == BRW_OPCODE_JMPI; // target relative to the post-incremented IP, so delta == 1 if JMPI
+ if(is_jmpi)
+ offset --;
+ offset = jump_distance(offset);
+ if (is_jmpi && (gen_level == 75))
+ offset = offset * 8;
+
+ if(!IS_GENp(6)) {
+ inst->bits3.JIP = offset;
+ if(inst->header.opcode == BRW_OPCODE_ELSE)
+ inst->bits3.break_cont.uip = 1; /* Set the istack pop count, which must always be 1. */
+ } else if(IS_GENx(6)) {
+ /* TODO: endif JIP pos is not in Gen6 spec. may be bits1 */
+ int opcode = inst->header.opcode;
+ if(opcode == BRW_OPCODE_CALL || opcode == BRW_OPCODE_JMPI)
+ inst->bits3.JIP = offset; // for CALL, JMPI
+ else
+ inst->bits1.branch_gen6.jump_count = offset; // for CASE,ELSE,FORK,IF,WHILE
+ } else if(IS_GENp(7)) {
+ int opcode = inst->header.opcode;
+ /* Gen7 JMPI Restrictions in bspec:
+ * The JIP data type must be Signed DWord
+ */
+ if(opcode == BRW_OPCODE_JMPI)
+ inst->bits3.JIP = offset;
+ else
+ inst->bits3.break_cont.jip = offset;
+ }
+ }
+ }
+
+ if (binary_like_output)
+ fprintf(output, "%s", binary_prepend);
+
+ for (entry = compiled_program.first;
+ entry != NULL;
+ entry = entry1) {
+ entry1 = entry->next;
+ if (!is_label(entry))
+ print_instruction(output, &entry->insn.gen);
+ else
+ free(entry->insn.label.name);
+ free(entry);
+ }
+ if (binary_like_output)
+ fprintf(output, "};");
+
+ free_entry_point_table(entry_point_table);
+ free_hash_table(declared_register_table);
+ free_label_table(label_table);
+
+ fflush (output);
+ if (ferror (output)) {
+ perror ("Could not flush output file");
+ if (output_file)
+ unlink (output_file);
+ err = 1;
+ }
+ return err;
+}
diff --git a/assembler/ralloc.c b/assembler/ralloc.c
new file mode 100644
index 0000000..59e71c4
--- /dev/null
+++ b/assembler/ralloc.c
@@ -0,0 +1,482 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+/* Android defines SIZE_MAX in limits.h, instead of the standard stdint.h */
+#ifdef ANDROID
+#include <limits.h>
+#endif
+
+/* Some versions of MinGW are missing _vscprintf's declaration, although they
+ * still provide the symbol in the import library. */
+#ifdef __MINGW32__
+_CRTIMP int _vscprintf(const char *format, va_list argptr);
+#endif
+
+#include "ralloc.h"
+
+#ifndef va_copy
+#ifdef __va_copy
+#define va_copy(dest, src) __va_copy((dest), (src))
+#else
+#define va_copy(dest, src) (dest) = (src)
+#endif
+#endif
+
+#define CANARY 0x5A1106
+
+struct ralloc_header
+{
+ /* A canary value used to determine whether a pointer is ralloc'd. */
+ unsigned canary;
+
+ struct ralloc_header *parent;
+
+ /* The first child (head of a linked list) */
+ struct ralloc_header *child;
+
+ /* Linked list of siblings */
+ struct ralloc_header *prev;
+ struct ralloc_header *next;
+
+ void (*destructor)(void *);
+};
+
+typedef struct ralloc_header ralloc_header;
+
+static void unlink_block(ralloc_header *info);
+static void unsafe_free(ralloc_header *info);
+
+static ralloc_header *
+get_header(const void *ptr)
+{
+ ralloc_header *info = (ralloc_header *) (((char *) ptr) -
+ sizeof(ralloc_header));
+ assert(info->canary == CANARY);
+ return info;
+}
+
+#define PTR_FROM_HEADER(info) (((char *) info) + sizeof(ralloc_header))
+
+static void
+add_child(ralloc_header *parent, ralloc_header *info)
+{
+ if (parent != NULL) {
+ info->parent = parent;
+ info->next = parent->child;
+ parent->child = info;
+
+ if (info->next != NULL)
+ info->next->prev = info;
+ }
+}
+
+void *
+ralloc_context(const void *ctx)
+{
+ return ralloc_size(ctx, 0);
+}
+
+void *
+ralloc_size(const void *ctx, size_t size)
+{
+ void *block = calloc(1, size + sizeof(ralloc_header));
+
+ ralloc_header *info = (ralloc_header *) block;
+ ralloc_header *parent = ctx != NULL ? get_header(ctx) : NULL;
+
+ add_child(parent, info);
+
+ info->canary = CANARY;
+
+ return PTR_FROM_HEADER(info);
+}
+
+void *
+rzalloc_size(const void *ctx, size_t size)
+{
+ void *ptr = ralloc_size(ctx, size);
+ if (likely(ptr != NULL))
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+/* helper function - assumes ptr != NULL */
+static void *
+resize(void *ptr, size_t size)
+{
+ ralloc_header *child, *old, *info;
+
+ old = get_header(ptr);
+ info = realloc(old, size + sizeof(ralloc_header));
+
+ if (info == NULL)
+ return NULL;
+
+ /* Update parent and sibling's links to the reallocated node. */
+ if (info != old && info->parent != NULL) {
+ if (info->parent->child == old)
+ info->parent->child = info;
+
+ if (info->prev != NULL)
+ info->prev->next = info;
+
+ if (info->next != NULL)
+ info->next->prev = info;
+ }
+
+ /* Update child->parent links for all children */
+ for (child = info->child; child != NULL; child = child->next)
+ child->parent = info;
+
+ return PTR_FROM_HEADER(info);
+}
+
+void *
+reralloc_size(const void *ctx, void *ptr, size_t size)
+{
+ if (unlikely(ptr == NULL))
+ return ralloc_size(ctx, size);
+
+ assert(ralloc_parent(ptr) == ctx);
+ return resize(ptr, size);
+}
+
+void *
+ralloc_array_size(const void *ctx, size_t size, unsigned count)
+{
+ if (count > SIZE_MAX/size)
+ return NULL;
+
+ return ralloc_size(ctx, size * count);
+}
+
+void *
+rzalloc_array_size(const void *ctx, size_t size, unsigned count)
+{
+ if (count > SIZE_MAX/size)
+ return NULL;
+
+ return rzalloc_size(ctx, size * count);
+}
+
+void *
+reralloc_array_size(const void *ctx, void *ptr, size_t size, unsigned count)
+{
+ if (count > SIZE_MAX/size)
+ return NULL;
+
+ return reralloc_size(ctx, ptr, size * count);
+}
+
+void
+ralloc_free(void *ptr)
+{
+ ralloc_header *info;
+
+ if (ptr == NULL)
+ return;
+
+ info = get_header(ptr);
+ unlink_block(info);
+ unsafe_free(info);
+}
+
+static void
+unlink_block(ralloc_header *info)
+{
+ /* Unlink from parent & siblings */
+ if (info->parent != NULL) {
+ if (info->parent->child == info)
+ info->parent->child = info->next;
+
+ if (info->prev != NULL)
+ info->prev->next = info->next;
+
+ if (info->next != NULL)
+ info->next->prev = info->prev;
+ }
+ info->parent = NULL;
+ info->prev = NULL;
+ info->next = NULL;
+}
+
+static void
+unsafe_free(ralloc_header *info)
+{
+ /* Recursively free any children...don't waste time unlinking them. */
+ ralloc_header *temp;
+ while (info->child != NULL) {
+ temp = info->child;
+ info->child = temp->next;
+ unsafe_free(temp);
+ }
+
+ /* Free the block itself. Call the destructor first, if any. */
+ if (info->destructor != NULL)
+ info->destructor(PTR_FROM_HEADER(info));
+
+ free(info);
+}
+
+void
+ralloc_steal(const void *new_ctx, void *ptr)
+{
+ ralloc_header *info, *parent;
+
+ if (unlikely(ptr == NULL))
+ return;
+
+ info = get_header(ptr);
+ parent = get_header(new_ctx);
+
+ unlink_block(info);
+
+ add_child(parent, info);
+}
+
+void *
+ralloc_parent(const void *ptr)
+{
+ ralloc_header *info;
+
+ if (unlikely(ptr == NULL))
+ return NULL;
+
+ info = get_header(ptr);
+ return info->parent ? PTR_FROM_HEADER(info->parent) : NULL;
+}
+
+static void *autofree_context = NULL;
+
+static void
+autofree(void)
+{
+ ralloc_free(autofree_context);
+}
+
+void *
+ralloc_autofree_context(void)
+{
+ if (unlikely(autofree_context == NULL)) {
+ autofree_context = ralloc_context(NULL);
+ atexit(autofree);
+ }
+ return autofree_context;
+}
+
+void
+ralloc_set_destructor(const void *ptr, void(*destructor)(void *))
+{
+ ralloc_header *info = get_header(ptr);
+ info->destructor = destructor;
+}
+
+char *
+ralloc_strdup(const void *ctx, const char *str)
+{
+ size_t n;
+ char *ptr;
+
+ if (unlikely(str == NULL))
+ return NULL;
+
+ n = strlen(str);
+ ptr = ralloc_array(ctx, char, n + 1);
+ memcpy(ptr, str, n);
+ ptr[n] = '\0';
+ return ptr;
+}
+
+char *
+ralloc_strndup(const void *ctx, const char *str, size_t max)
+{
+ size_t n;
+ char *ptr;
+
+ if (unlikely(str == NULL))
+ return NULL;
+
+ n = strlen(str);
+ if (n > max)
+ n = max;
+
+ ptr = ralloc_array(ctx, char, n + 1);
+ memcpy(ptr, str, n);
+ ptr[n] = '\0';
+ return ptr;
+}
+
+/* helper routine for strcat/strncat - n is the exact amount to copy */
+static bool
+cat(char **dest, const char *str, size_t n)
+{
+ char *both;
+ size_t existing_length;
+ assert(dest != NULL && *dest != NULL);
+
+ existing_length = strlen(*dest);
+ both = resize(*dest, existing_length + n + 1);
+ if (unlikely(both == NULL))
+ return false;
+
+ memcpy(both + existing_length, str, n);
+ both[existing_length + n] = '\0';
+
+ *dest = both;
+ return true;
+}
+
+
+bool
+ralloc_strcat(char **dest, const char *str)
+{
+ return cat(dest, str, strlen(str));
+}
+
+bool
+ralloc_strncat(char **dest, const char *str, size_t n)
+{
+ /* Clamp n to the string length */
+ size_t str_length = strlen(str);
+ if (str_length < n)
+ n = str_length;
+
+ return cat(dest, str, n);
+}
+
+char *
+ralloc_asprintf(const void *ctx, const char *fmt, ...)
+{
+ char *ptr;
+ va_list args;
+ va_start(args, fmt);
+ ptr = ralloc_vasprintf(ctx, fmt, args);
+ va_end(args);
+ return ptr;
+}
+
+/* Return the length of the string that would be generated by a printf-style
+ * format and argument list, not including the \0 byte.
+ */
+static size_t
+printf_length(const char *fmt, va_list untouched_args)
+{
+ int size;
+ char junk;
+
+ /* Make a copy of the va_list so the original caller can still use it */
+ va_list args;
+ va_copy(args, untouched_args);
+
+#ifdef _WIN32
+ /* We need to use _vcsprintf to calculate the size as vsnprintf returns -1
+ * if the number of characters to write is greater than count.
+ */
+ size = _vscprintf(fmt, args);
+ (void)junk;
+#else
+ size = vsnprintf(&junk, 1, fmt, args);
+#endif
+ assert(size >= 0);
+
+ va_end(args);
+
+ return size;
+}
+
+char *
+ralloc_vasprintf(const void *ctx, const char *fmt, va_list args)
+{
+ size_t size = printf_length(fmt, args) + 1;
+
+ char *ptr = ralloc_size(ctx, size);
+ if (ptr != NULL)
+ vsnprintf(ptr, size, fmt, args);
+
+ return ptr;
+}
+
+bool
+ralloc_asprintf_append(char **str, const char *fmt, ...)
+{
+ bool success;
+ va_list args;
+ va_start(args, fmt);
+ success = ralloc_vasprintf_append(str, fmt, args);
+ va_end(args);
+ return success;
+}
+
+bool
+ralloc_vasprintf_append(char **str, const char *fmt, va_list args)
+{
+ size_t existing_length;
+ assert(str != NULL);
+ existing_length = *str ? strlen(*str) : 0;
+ return ralloc_vasprintf_rewrite_tail(str, &existing_length, fmt, args);
+}
+
+bool
+ralloc_asprintf_rewrite_tail(char **str, size_t *start, const char *fmt, ...)
+{
+ bool success;
+ va_list args;
+ va_start(args, fmt);
+ success = ralloc_vasprintf_rewrite_tail(str, start, fmt, args);
+ va_end(args);
+ return success;
+}
+
+bool
+ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
+ va_list args)
+{
+ size_t new_length;
+ char *ptr;
+
+ assert(str != NULL);
+
+ if (unlikely(*str == NULL)) {
+ // Assuming a NULL context is probably bad, but it's expected behavior.
+ *str = ralloc_vasprintf(NULL, fmt, args);
+ return true;
+ }
+
+ new_length = printf_length(fmt, args);
+
+ ptr = resize(*str, *start + new_length + 1);
+ if (unlikely(ptr == NULL))
+ return false;
+
+ vsnprintf(ptr + *start, new_length + 1, fmt, args);
+ *str = ptr;
+ *start += new_length;
+ return true;
+}
diff --git a/assembler/ralloc.h b/assembler/ralloc.h
new file mode 100644
index 0000000..6228d5b
--- /dev/null
+++ b/assembler/ralloc.h
@@ -0,0 +1,407 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ralloc.h
+ *
+ * ralloc: a recursive memory allocator
+ *
+ * The ralloc memory allocator creates a hierarchy of allocated
+ * objects. Every allocation is in reference to some parent, and
+ * every allocated object can in turn be used as the parent of a
+ * subsequent allocation. This allows for extremely convenient
+ * discarding of an entire tree/sub-tree of allocations by calling
+ * ralloc_free on any particular object to free it and all of its
+ * children.
+ *
+ * The conceptual working of ralloc was directly inspired by Andrew
+ * Tridgell's talloc, but ralloc is an independent implementation
+ * released under the MIT license and tuned for Mesa.
+ *
+ * The talloc implementation is available under the GNU Lesser
+ * General Public License (GNU LGPL), version 3 or later. It is
+ * more sophisticated than ralloc in that it includes reference
+ * counting and debugging features. See: http://talloc.samba.org/
+ */
+
+#ifndef RALLOC_H
+#define RALLOC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include "brw_compat.h"
+
+/**
+ * \def ralloc(ctx, type)
+ * Allocate a new object chained off of the given context.
+ *
+ * This is equivalent to:
+ * \code
+ * ((type *) ralloc_size(ctx, sizeof(type))
+ * \endcode
+ */
+#define ralloc(ctx, type) ((type *) ralloc_size(ctx, sizeof(type)))
+
+/**
+ * \def rzalloc(ctx, type)
+ * Allocate a new object out of the given context and initialize it to zero.
+ *
+ * This is equivalent to:
+ * \code
+ * ((type *) rzalloc_size(ctx, sizeof(type))
+ * \endcode
+ */
+#define rzalloc(ctx, type) ((type *) rzalloc_size(ctx, sizeof(type)))
+
+/**
+ * Allocate a new ralloc context.
+ *
+ * While any ralloc'd pointer can be used as a context, sometimes it is useful
+ * to simply allocate a context with no associated memory.
+ *
+ * It is equivalent to:
+ * \code
+ * ((type *) ralloc_size(ctx, 0)
+ * \endcode
+ */
+void *ralloc_context(const void *ctx);
+
+/**
+ * Allocate memory chained off of the given context.
+ *
+ * This is the core allocation routine which is used by all others. It
+ * simply allocates storage for \p size bytes and returns the pointer,
+ * similar to \c malloc.
+ */
+void *ralloc_size(const void *ctx, size_t size);
+
+/**
+ * Allocate zero-initialized memory chained off of the given context.
+ *
+ * This is similar to \c calloc with a size of 1.
+ */
+void *rzalloc_size(const void *ctx, size_t size);
+
+/**
+ * Resize a piece of ralloc-managed memory, preserving data.
+ *
+ * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the
+ * memory. Instead, it resizes it to a 0-byte ralloc context, just like
+ * calling ralloc_size(ctx, 0). This is different from talloc.
+ *
+ * \param ctx The context to use for new allocation. If \p ptr != NULL,
+ * it must be the same as ralloc_parent(\p ptr).
+ * \param ptr Pointer to the memory to be resized. May be NULL.
+ * \param size The amount of memory to allocate, in bytes.
+ */
+void *reralloc_size(const void *ctx, void *ptr, size_t size);
+
+/// \defgroup array Array Allocators @{
+
+/**
+ * \def ralloc_array(ctx, type, count)
+ * Allocate an array of objects chained off the given context.
+ *
+ * Similar to \c calloc, but does not initialize the memory to zero.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count. This is necessary for security.
+ *
+ * This is equivalent to:
+ * \code
+ * ((type *) ralloc_array_size(ctx, sizeof(type), count)
+ * \endcode
+ */
+#define ralloc_array(ctx, type, count) \
+ ((type *) ralloc_array_size(ctx, sizeof(type), count))
+
+/**
+ * \def rzalloc_array(ctx, type, count)
+ * Allocate a zero-initialized array chained off the given context.
+ *
+ * Similar to \c calloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count. This is necessary for security.
+ *
+ * This is equivalent to:
+ * \code
+ * ((type *) rzalloc_array_size(ctx, sizeof(type), count)
+ * \endcode
+ */
+#define rzalloc_array(ctx, type, count) \
+ ((type *) rzalloc_array_size(ctx, sizeof(type), count))
+
+/**
+ * \def reralloc(ctx, ptr, type, count)
+ * Resize a ralloc-managed array, preserving data.
+ *
+ * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the
+ * memory. Instead, it resizes it to a 0-byte ralloc context, just like
+ * calling ralloc_size(ctx, 0). This is different from talloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count. This is necessary for security.
+ *
+ * \param ctx The context to use for new allocation. If \p ptr != NULL,
+ * it must be the same as ralloc_parent(\p ptr).
+ * \param ptr Pointer to the array to be resized. May be NULL.
+ * \param type The element type.
+ * \param count The number of elements to allocate.
+ */
+#define reralloc(ctx, ptr, type, count) \
+ ((type *) reralloc_array_size(ctx, ptr, sizeof(type), count))
+
+/**
+ * Allocate memory for an array chained off the given context.
+ *
+ * Similar to \c calloc, but does not initialize the memory to zero.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \p size and \p count. This is necessary for security.
+ */
+void *ralloc_array_size(const void *ctx, size_t size, unsigned count);
+
+/**
+ * Allocate a zero-initialized array chained off the given context.
+ *
+ * Similar to \c calloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \p size and \p count. This is necessary for security.
+ */
+void *rzalloc_array_size(const void *ctx, size_t size, unsigned count);
+
+/**
+ * Resize a ralloc-managed array, preserving data.
+ *
+ * Similar to \c realloc. Unlike C89, passing 0 for \p size does not free the
+ * memory. Instead, it resizes it to a 0-byte ralloc context, just like
+ * calling ralloc_size(ctx, 0). This is different from talloc.
+ *
+ * More than a convenience function, this also checks for integer overflow when
+ * multiplying \c sizeof(type) and \p count. This is necessary for security.
+ *
+ * \param ctx The context to use for new allocation. If \p ptr != NULL,
+ * it must be the same as ralloc_parent(\p ptr).
+ * \param ptr Pointer to the array to be resized. May be NULL.
+ * \param size The size of an individual element.
+ * \param count The number of elements to allocate.
+ *
+ * \return True unless allocation failed.
+ */
+void *reralloc_array_size(const void *ctx, void *ptr, size_t size,
+ unsigned count);
+/// @}
+
+/**
+ * Free a piece of ralloc-managed memory.
+ *
+ * This will also free the memory of any children allocated this context.
+ */
+void ralloc_free(void *ptr);
+
+/**
+ * "Steal" memory from one context, changing it to another.
+ *
+ * This changes \p ptr's context to \p new_ctx. This is quite useful if
+ * memory is allocated out of a temporary context.
+ */
+void ralloc_steal(const void *new_ctx, void *ptr);
+
+/**
+ * Return the given pointer's ralloc context.
+ */
+void *ralloc_parent(const void *ptr);
+
+/**
+ * Return a context whose memory will be automatically freed at program exit.
+ *
+ * The first call to this function creates a context and registers a handler
+ * to free it using \c atexit. This may cause trouble if used in a library
+ * loaded with \c dlopen.
+ */
+void *ralloc_autofree_context(void);
+
+/**
+ * Set a callback to occur just before an object is freed.
+ */
+void ralloc_set_destructor(const void *ptr, void(*destructor)(void *));
+
+/// \defgroup array String Functions @{
+/**
+ * Duplicate a string, allocating the memory from the given context.
+ */
+char *ralloc_strdup(const void *ctx, const char *str);
+
+/**
+ * Duplicate a string, allocating the memory from the given context.
+ *
+ * Like \c strndup, at most \p n characters are copied. If \p str is longer
+ * than \p n characters, \p n are copied, and a termining \c '\0' byte is added.
+ */
+char *ralloc_strndup(const void *ctx, const char *str, size_t n);
+
+/**
+ * Concatenate two strings, allocating the necessary space.
+ *
+ * This appends \p str to \p *dest, similar to \c strcat, using ralloc_resize
+ * to expand \p *dest to the appropriate size. \p dest will be updated to the
+ * new pointer unless allocation fails.
+ *
+ * The result will always be null-terminated.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_strcat(char **dest, const char *str);
+
+/**
+ * Concatenate two strings, allocating the necessary space.
+ *
+ * This appends at most \p n bytes of \p str to \p *dest, using ralloc_resize
+ * to expand \p *dest to the appropriate size. \p dest will be updated to the
+ * new pointer unless allocation fails.
+ *
+ * The result will always be null-terminated; \p str does not need to be null
+ * terminated if it is longer than \p n.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_strncat(char **dest, const char *str, size_t n);
+
+/**
+ * Print to a string.
+ *
+ * This is analogous to \c sprintf, but allocates enough space (using \p ctx
+ * as the context) for the resulting string.
+ *
+ * \return The newly allocated string.
+ */
+char *ralloc_asprintf (const void *ctx, const char *fmt, ...) PRINTFLIKE(2, 3);
+
+/**
+ * Print to a string, given a va_list.
+ *
+ * This is analogous to \c vsprintf, but allocates enough space (using \p ctx
+ * as the context) for the resulting string.
+ *
+ * \return The newly allocated string.
+ */
+char *ralloc_vasprintf(const void *ctx, const char *fmt, va_list args);
+
+/**
+ * Rewrite the tail of an existing string, starting at a given index.
+ *
+ * Overwrites the contents of *str starting at \p start with newly formatted
+ * text, including a new null-terminator. Allocates more memory as necessary.
+ *
+ * This can be used to append formatted text when the length of the existing
+ * string is already known, saving a strlen() call.
+ *
+ * \sa ralloc_asprintf_append
+ *
+ * \param str The string to be updated.
+ * \param start The index to start appending new data at.
+ * \param fmt A printf-style formatting string
+ *
+ * \p str will be updated to the new pointer unless allocation fails.
+ * \p start will be increased by the length of the newly formatted text.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_asprintf_rewrite_tail(char **str, size_t *start,
+ const char *fmt, ...)
+ PRINTFLIKE(3, 4);
+
+/**
+ * Rewrite the tail of an existing string, starting at a given index.
+ *
+ * Overwrites the contents of *str starting at \p start with newly formatted
+ * text, including a new null-terminator. Allocates more memory as necessary.
+ *
+ * This can be used to append formatted text when the length of the existing
+ * string is already known, saving a strlen() call.
+ *
+ * \sa ralloc_vasprintf_append
+ *
+ * \param str The string to be updated.
+ * \param start The index to start appending new data at.
+ * \param fmt A printf-style formatting string
+ * \param args A va_list containing the data to be formatted
+ *
+ * \p str will be updated to the new pointer unless allocation fails.
+ * \p start will be increased by the length of the newly formatted text.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
+ va_list args);
+
+/**
+ * Append formatted text to the supplied string.
+ *
+ * This is equivalent to
+ * \code
+ * ralloc_asprintf_rewrite_tail(str, strlen(*str), fmt, ...)
+ * \endcode
+ *
+ * \sa ralloc_asprintf
+ * \sa ralloc_asprintf_rewrite_tail
+ * \sa ralloc_strcat
+ *
+ * \p str will be updated to the new pointer unless allocation fails.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_asprintf_append (char **str, const char *fmt, ...)
+ PRINTFLIKE(2, 3);
+
+/**
+ * Append formatted text to the supplied string, given a va_list.
+ *
+ * This is equivalent to
+ * \code
+ * ralloc_vasprintf_rewrite_tail(str, strlen(*str), fmt, args)
+ * \endcode
+ *
+ * \sa ralloc_vasprintf
+ * \sa ralloc_vasprintf_rewrite_tail
+ * \sa ralloc_strcat
+ *
+ * \p str will be updated to the new pointer unless allocation fails.
+ *
+ * \return True unless allocation failed.
+ */
+bool ralloc_vasprintf_append(char **str, const char *fmt, va_list args);
+/// @}
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif
diff --git a/assembler/test/.gitignore b/assembler/test/.gitignore
new file mode 100644
index 0000000..090a4f9
--- /dev/null
+++ b/assembler/test/.gitignore
@@ -0,0 +1,21 @@
+*.out
+mov
+frc
+rndd
+rnde
+rnde-intsrc
+rndu
+rndz
+lzd
+not
+jmpi
+if
+iff
+while
+else
+break
+cont
+halt
+wait
+endif
+immediate
diff --git a/assembler/test/Makefile.am b/assembler/test/Makefile.am
new file mode 100644
index 0000000..0d8d349
--- /dev/null
+++ b/assembler/test/Makefile.am
@@ -0,0 +1,84 @@
+check_SCRIPTS = run-test.sh
+
+TESTS_ENVIRONMENT = top_builddir=${top_builddir}
+TESTS = \
+ mov \
+ frc \
+ rndd \
+ rndu \
+ rnde \
+ rnde-intsrc \
+ rndz \
+ lzd \
+ not \
+ jmpi \
+ if \
+ iff \
+ while \
+ else \
+ break \
+ cont \
+ halt \
+ wait \
+ endif \
+ declare \
+ immediate
+
+# Tests that are expected to fail because they contain some inccorect code.
+XFAIL_TESTS = \
+ rnde-intsrc
+
+TESTDATA = \
+ mov.expected \
+ mov.g4a \
+ frc.expected \
+ frc.g4a \
+ rndd.expected \
+ rndd.g4a \
+ rndu.expected \
+ rndu.g4a \
+ rnde.expected \
+ rnde.g4a \
+ rnde-intsrc.expected \
+ rnde-intsrc.g4a \
+ rndz.expected \
+ rndz.g4a \
+ lzd.expected \
+ lzd.g4a \
+ not.expected \
+ not.g4a \
+ jmpi.expected \
+ jmpi.g4a \
+ if.expected \
+ if.g4a \
+ iff.expected \
+ iff.g4a \
+ while.expected \
+ while.g4a \
+ else.expected \
+ else.g4a \
+ break.expected \
+ break.g4a \
+ cont.expected \
+ cont.g4a \
+ halt.expected \
+ halt.g4a \
+ wait.expected \
+ wait.g4a \
+ endif.expected \
+ endif.g4a \
+ declare.expected \
+ declare.g4a \
+ immediate.g4a \
+ immediate.expected
+
+EXTRA_DIST = \
+ ${TESTDATA} \
+ run-test.sh
+
+$(TESTS): run-test.sh
+ chmod +x ${srcdir}/run-test.sh
+
+CLEANFILES = \
+ *.out \
+ ${TESTS}
diff --git a/assembler/test/break.expected b/assembler/test/break.expected
new file mode 100644
index 0000000..4e3e4eb
--- /dev/null
+++ b/assembler/test/break.expected
@@ -0,0 +1 @@
+ { 0x00000028, 0x34001c00, 0x00011400, 0x00010002 },
diff --git a/assembler/test/break.g4a b/assembler/test/break.g4a
new file mode 100644
index 0000000..f23a0ba
--- /dev/null
+++ b/assembler/test/break.g4a
@@ -0,0 +1,6 @@
+/* The break instruction syntax, which is currently just what was in the BNF,
+ * is bad. It really needs 2 arguments -- pop count (19:16, how many
+ * loops to break out of), and the IP count (15:0). For now, this argument
+ * should cover 1 loop, and jumping 2 instructions.
+ */
+break 65538;
diff --git a/assembler/test/cont.expected b/assembler/test/cont.expected
new file mode 100644
index 0000000..a1cd936
--- /dev/null
+++ b/assembler/test/cont.expected
@@ -0,0 +1 @@
+ { 0x00000029, 0x34001c00, 0x00011400, 0x00010002 },
diff --git a/assembler/test/cont.g4a b/assembler/test/cont.g4a
new file mode 100644
index 0000000..300e7d8
--- /dev/null
+++ b/assembler/test/cont.g4a
@@ -0,0 +1,6 @@
+/* The cont instruction syntax, which is currently just what was in the BNF,
+ * is bad. It really needs 2 arguments -- pop count (19:16, how many
+ * loops to break out of), and the IP count (15:0). For now, this argument
+ * should cover 1 loop, and jumping 2 instructions.
+ */
+cont 65538;
diff --git a/assembler/test/declare.expected b/assembler/test/declare.expected
new file mode 100644
index 0000000..36ad68f
--- /dev/null
+++ b/assembler/test/declare.expected
@@ -0,0 +1,3 @@
+ { 0x00e00040, 0x20007fbd, 0x008d0f64, 0x3f9d70a4 },
+ { 0x00e00040, 0x200077bd, 0x008d0f64, 0x008d0020 },
+ { 0x00e00040, 0x2f6477bd, 0x008d0000, 0x008d0020 },
diff --git a/assembler/test/declare.g4a b/assembler/test/declare.g4a
new file mode 100644
index 0000000..d3414e4
--- /dev/null
+++ b/assembler/test/declare.g4a
@@ -0,0 +1,5 @@
+.declare X1 Base=g99.0 ElementSize=1 SrcRegion=<8,8,1> DstRegion=<1> Type=F
+.declare X1 Base=g123.4 ElementSize=4 SrcRegion=<8,8,1> DstRegion=<1> Type=F
+add g0<1>:f X1 1.23:f;
+add g0<1>:f X1 g1<8,8,1>:f;
+add X1 g0<8,8,1>:f g1<8,8,1>:f;
diff --git a/assembler/test/else.expected b/assembler/test/else.expected
new file mode 100644
index 0000000..bdc77e4
--- /dev/null
+++ b/assembler/test/else.expected
@@ -0,0 +1 @@
+ { 0x00000024, 0x34001c00, 0x00011400, 0x00010002 },
diff --git a/assembler/test/else.g4a b/assembler/test/else.g4a
new file mode 100644
index 0000000..f410380
--- /dev/null
+++ b/assembler/test/else.g4a
@@ -0,0 +1 @@
+else 2;
diff --git a/assembler/test/endif.expected b/assembler/test/endif.expected
new file mode 100644
index 0000000..b8a3003
--- /dev/null
+++ b/assembler/test/endif.expected
@@ -0,0 +1 @@
+ { 0x00000025, 0x00001c00, 0x00000000, 0x00010000 },
diff --git a/assembler/test/endif.g4a b/assembler/test/endif.g4a
new file mode 100644
index 0000000..b3b09fa
--- /dev/null
+++ b/assembler/test/endif.g4a
@@ -0,0 +1 @@
+endif;
diff --git a/assembler/test/frc.expected b/assembler/test/frc.expected
new file mode 100644
index 0000000..e93f8f7
--- /dev/null
+++ b/assembler/test/frc.expected
@@ -0,0 +1 @@
+ { 0x00000001, 0x20000021, 0x00000020, 0x00000000 },
diff --git a/assembler/test/frc.g4a b/assembler/test/frc.g4a
new file mode 100644
index 0000000..8844f67
--- /dev/null
+++ b/assembler/test/frc.g4a
@@ -0,0 +1 @@
+mov (1) g0<1>UD g1<0,1,0>UD { align1 };
diff --git a/assembler/test/halt.expected b/assembler/test/halt.expected
new file mode 100644
index 0000000..b92db85
--- /dev/null
+++ b/assembler/test/halt.expected
@@ -0,0 +1 @@
+ { 0x0000002a, 0x34001c00, 0x00011400, 0x00000002 },
diff --git a/assembler/test/halt.g4a b/assembler/test/halt.g4a
new file mode 100644
index 0000000..e6952b1
--- /dev/null
+++ b/assembler/test/halt.g4a
@@ -0,0 +1 @@
+halt 2;
diff --git a/assembler/test/if.expected b/assembler/test/if.expected
new file mode 100644
index 0000000..d2fa54d
--- /dev/null
+++ b/assembler/test/if.expected
@@ -0,0 +1 @@
+ { 0x00000022, 0x34001c00, 0x00011400, 0x00000002 },
diff --git a/assembler/test/if.g4a b/assembler/test/if.g4a
new file mode 100644
index 0000000..60ba4da
--- /dev/null
+++ b/assembler/test/if.g4a
@@ -0,0 +1 @@
+if 2;
diff --git a/assembler/test/iff.expected b/assembler/test/iff.expected
new file mode 100644
index 0000000..b5dd6f6
--- /dev/null
+++ b/assembler/test/iff.expected
@@ -0,0 +1 @@
+ { 0x00000023, 0x34001c00, 0x00011400, 0x00000002 },
diff --git a/assembler/test/iff.g4a b/assembler/test/iff.g4a
new file mode 100644
index 0000000..d728ed0
--- /dev/null
+++ b/assembler/test/iff.g4a
@@ -0,0 +1 @@
+iff 2;
diff --git a/assembler/test/immediate.expected b/assembler/test/immediate.expected
new file mode 100644
index 0000000..b1aa921
--- /dev/null
+++ b/assembler/test/immediate.expected
@@ -0,0 +1,3 @@
+ { 0x00000001, 0x20000061, 0x00000000, 0xffffffff },
+ { 0x00000001, 0x200000e1, 0x00000000, 0x7fffffff },
+ { 0x00000001, 0x200000e1, 0x00000000, 0x80000000 },
diff --git a/assembler/test/immediate.g4a b/assembler/test/immediate.g4a
new file mode 100644
index 0000000..4b9e2d3
--- /dev/null
+++ b/assembler/test/immediate.g4a
@@ -0,0 +1,3 @@
+mov (1) g0<1>UD 4294967295UD { align1 };
+mov (1) g0<1>UD 2147483647D { align1 };
+mov (1) g0<1>UD -2147483648D { align1 };
diff --git a/assembler/test/jmpi.expected b/assembler/test/jmpi.expected
new file mode 100644
index 0000000..a53a036
--- /dev/null
+++ b/assembler/test/jmpi.expected
@@ -0,0 +1 @@
+ { 0x00000020, 0x34001c00, 0x00011400, 0x00000002 },
diff --git a/assembler/test/jmpi.g4a b/assembler/test/jmpi.g4a
new file mode 100644
index 0000000..7503dd4
--- /dev/null
+++ b/assembler/test/jmpi.g4a
@@ -0,0 +1 @@
+jmpi 2;
diff --git a/assembler/test/lzd.expected b/assembler/test/lzd.expected
new file mode 100644
index 0000000..1df4db9
--- /dev/null
+++ b/assembler/test/lzd.expected
@@ -0,0 +1 @@
+ { 0x0000004a, 0x20000021, 0x00000020, 0x00000000 },
diff --git a/assembler/test/lzd.g4a b/assembler/test/lzd.g4a
new file mode 100644
index 0000000..b644d76
--- /dev/null
+++ b/assembler/test/lzd.g4a
@@ -0,0 +1 @@
+lzd (1) g0<1>UD g1<0,1,0>UD { align1 };
diff --git a/assembler/test/mov.expected b/assembler/test/mov.expected
new file mode 100644
index 0000000..e93f8f7
--- /dev/null
+++ b/assembler/test/mov.expected
@@ -0,0 +1 @@
+ { 0x00000001, 0x20000021, 0x00000020, 0x00000000 },
diff --git a/assembler/test/mov.g4a b/assembler/test/mov.g4a
new file mode 100644
index 0000000..8844f67
--- /dev/null
+++ b/assembler/test/mov.g4a
@@ -0,0 +1 @@
+mov (1) g0<1>UD g1<0,1,0>UD { align1 };
diff --git a/assembler/test/not.expected b/assembler/test/not.expected
new file mode 100644
index 0000000..072d7ab
--- /dev/null
+++ b/assembler/test/not.expected
@@ -0,0 +1 @@
+ { 0x00000004, 0x20000021, 0x00000020, 0x00000000 },
diff --git a/assembler/test/not.g4a b/assembler/test/not.g4a
new file mode 100644
index 0000000..69d9f8c
--- /dev/null
+++ b/assembler/test/not.g4a
@@ -0,0 +1 @@
+not (1) g0<1>UD g1<0,1,0>UD { align1 };
diff --git a/assembler/test/rndd.expected b/assembler/test/rndd.expected
new file mode 100644
index 0000000..a841e25
--- /dev/null
+++ b/assembler/test/rndd.expected
@@ -0,0 +1 @@
+ { 0x00000045, 0x200003a1, 0x00000020, 0x00000000 },
diff --git a/assembler/test/rndd.g4a b/assembler/test/rndd.g4a
new file mode 100644
index 0000000..832a544
--- /dev/null
+++ b/assembler/test/rndd.g4a
@@ -0,0 +1 @@
+rndd (1) g0<1>UD g1<0,1,0>F { align1 };
diff --git a/assembler/test/rnde-intsrc.expected b/assembler/test/rnde-intsrc.expected
new file mode 100644
index 0000000..1138d73
--- /dev/null
+++ b/assembler/test/rnde-intsrc.expected
@@ -0,0 +1 @@
+ { 0x00000046, 0x20000021, 0x00000020, 0x00000000 },
diff --git a/assembler/test/rnde-intsrc.g4a b/assembler/test/rnde-intsrc.g4a
new file mode 100644
index 0000000..68562fa
--- /dev/null
+++ b/assembler/test/rnde-intsrc.g4a
@@ -0,0 +1,2 @@
+/* Non-float types are not permitted in the sources of round instructions. */
+rnde (1) g0<1>UD g1<0,1,0>UD { align1 };
diff --git a/assembler/test/rnde.expected b/assembler/test/rnde.expected
new file mode 100644
index 0000000..2155379
--- /dev/null
+++ b/assembler/test/rnde.expected
@@ -0,0 +1 @@
+ { 0x00000046, 0x200003a1, 0x00000020, 0x00000000 },
diff --git a/assembler/test/rnde.g4a b/assembler/test/rnde.g4a
new file mode 100644
index 0000000..9bc13cb
--- /dev/null
+++ b/assembler/test/rnde.g4a
@@ -0,0 +1 @@
+rnde (1) g0<1>UD g1<0,1,0>F { align1 };
diff --git a/assembler/test/rndu.expected b/assembler/test/rndu.expected
new file mode 100644
index 0000000..46e26c1
--- /dev/null
+++ b/assembler/test/rndu.expected
@@ -0,0 +1 @@
+ { 0x00000044, 0x200003a1, 0x00000020, 0x00000000 },
diff --git a/assembler/test/rndu.g4a b/assembler/test/rndu.g4a
new file mode 100644
index 0000000..6321f2e
--- /dev/null
+++ b/assembler/test/rndu.g4a
@@ -0,0 +1 @@
+rndu (1) g0<1>UD g1<0,1,0>F { align1 };
diff --git a/assembler/test/rndz.expected b/assembler/test/rndz.expected
new file mode 100644
index 0000000..9045cfc
--- /dev/null
+++ b/assembler/test/rndz.expected
@@ -0,0 +1 @@
+ { 0x00000047, 0x200003a1, 0x00000020, 0x00000000 },
diff --git a/assembler/test/rndz.g4a b/assembler/test/rndz.g4a
new file mode 100644
index 0000000..6dd60f7
--- /dev/null
+++ b/assembler/test/rndz.g4a
@@ -0,0 +1 @@
+rndz (1) g0<1>UD g1<0,1,0>F { align1 };
diff --git a/assembler/test/run-test.sh b/assembler/test/run-test.sh
new file mode 100644
index 0000000..e02a6e0
--- /dev/null
+++ b/assembler/test/run-test.sh
@@ -0,0 +1,83 @@
+#!/bin/sh
+
+#TODO: add new test cases in environment variables ${TEST_GEN4_XXX}
+
+DIR="$( cd -P "$( dirname "$0" )" && pwd )"
+ASSEMBLER="${DIR}/../src/intel-gen4asm"
+
+# Tests that are expected to success because they contain correct code.
+# $1 is the gen level, e.g., 4 or 7
+# $2 is the test case name
+function check_if_work()
+{
+ GEN_LEVEL="$1"
+ TEST_CASE_NAME="$2"
+ SOURCE="${TEST_CASE_NAME}.g${1}a"
+ EXPECTED="${TEST_CASE_NAME}.expected"
+ TEMP_OUT="temp.out"
+ ${ASSEMBLER} -g ${GEN_LEVEL} ${DIR}/${SOURCE} -o ${TEMP_OUT}
+ if cmp ${TEMP_OUT} ${DIR}/${EXPECTED} 2> /dev/null;
+ then
+ echo "[ OK ] ${TEST_CASE_NAME}";
+ else
+ echo "[FAIL] ${TEST_CASE_NAME}";
+ diff -u ${DIR}/${EXPECTED} ${TEMP_OUT};
+ fi
+}
+
+# Tests that are expected to fail because they contain wrong code.
+function check_if_fail()
+{
+ GEN_LEVEL="$1"
+ TEST_CASE_NAME="$2"
+ SOURCE="${TEST_CASE_NAME}.g${1}a"
+ TEMP_OUT="temp.out"
+ ${ASSEMBLER} -g ${GEN_LEVEL} ${DIR}/${SOURCE} -o ${TEMP_OUT} 2>/dev/null
+ if [ $? -eq 0 ];
+ then
+ echo "[FAIL] ${TEST_CASE_NAME}";
+ else
+ echo "[ OK ] ${TEST_CASE_NAME}";
+ fi
+}
+
+# Tests that are expected to success because they contain correct code.
+TEST_GEN4_SHOULD_WORK="\
+ mov \
+ frc \
+ rndd \
+ rndu \
+ rnde \
+ rnde-intsrc \
+ rndz \
+ lzd \
+ not \
+ jmpi \
+ if \
+ iff \
+ while \
+ else \
+ break \
+ cont \
+ halt \
+ wait \
+ endif \
+ declare \
+ immediate \
+ "
+
+# Tests that are expected to fail because they contain wrong code.
+TEST_GEN4_SHOULD_FAIL="\
+ rnde-intsrc \
+ "
+
+for T in ${TEST_GEN4_SHOULD_WORK}
+do
+ check_if_work 4 ${T}
+done
+
+for T in ${TEST_GEN4_SHOULD_FAIL}
+do
+ check_if_fail 4 ${T}
+done
+
diff --git a/assembler/test/wait.expected b/assembler/test/wait.expected
new file mode 100644
index 0000000..06a055b
--- /dev/null
+++ b/assembler/test/wait.expected
@@ -0,0 +1 @@
+ { 0x00000030, 0x20000000, 0x00001200, 0x00010000 },
diff --git a/assembler/test/wait.g4a b/assembler/test/wait.g4a
new file mode 100644
index 0000000..59d11fa
--- /dev/null
+++ b/assembler/test/wait.g4a
@@ -0,0 +1 @@
+wait n0;
diff --git a/assembler/test/while.expected b/assembler/test/while.expected
new file mode 100644
index 0000000..adad703
--- /dev/null
+++ b/assembler/test/while.expected
@@ -0,0 +1 @@
+ { 0x00000027, 0x34001c00, 0x00011400, 0x0000fffe },
diff --git a/assembler/test/while.g4a b/assembler/test/while.g4a
new file mode 100644
index 0000000..4f5e1df
--- /dev/null
+++ b/assembler/test/while.g4a
@@ -0,0 +1 @@
+while -2;
diff --git a/autogen.sh b/autogen.sh
index 904cd67..354f254 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,4 +9,4 @@ cd $srcdir
autoreconf -v --install || exit 1
cd $ORIGDIR || exit $?
-$srcdir/configure --enable-maintainer-mode "$@"
+$srcdir/configure "$@"
diff --git a/configure.ac b/configure.ac
index 5e2dbed..1c4e1c6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -36,9 +36,13 @@ AC_GNU_SOURCE
AM_INIT_AUTOMAKE([foreign dist-bzip2])
AM_PATH_PYTHON([3],, [:])
-AM_MAINTAINER_MODE
+
+AC_PROG_CC
+AM_PROG_LEX
+AC_PROG_YACC
# Checks for functions, headers, structures, etc.
+AC_HEADER_STDC
AC_CHECK_HEADERS([termios.h])
AC_CHECK_MEMBERS([struct sysinfo.totalram],[],[],[AC_INCLUDES_DEFAULT
#include <sys/sysinfo.h>
@@ -56,6 +60,16 @@ m4_ifndef([XORG_MACROS_VERSION],
XORG_MACROS_VERSION(1.16)
XORG_DEFAULT_OPTIONS
+# warning flags for the assembler. We can't quite use CWARNFLAGS for it yet as
+# it generates waaaay to many warnings.
+ASSEMBLER_WARN_CFLAGS=""
+if test "x$GCC" = "xyes"; then
+ ASSEMBLER_WARN_CFLAGS="-Wall -Wstrict-prototypes \
+ -Wmissing-prototypes -Wmissing-declarations \
+ -Wnested-externs -fno-strict-aliasing"
+fi
+AC_SUBST(ASSEMBLER_WARN_CFLAGS)
+
PKG_CHECK_MODULES(DRM, [libdrm_intel >= 2.4.38 libdrm])
PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10])
@@ -67,6 +81,12 @@ if test x"$udev" = xyes; then
fi
PKG_CHECK_MODULES(GLIB, glib-2.0)
+# can we build the assembler?
+AS_IF([test x"$LEX" != "x:" -a x"$YACC" != xyacc],
+ [enable_assembler=yes],
+ [enable_assembler=no])
+AM_CONDITIONAL(BUILD_ASSEMBLER, [test "x$enable_assembler" = xyes])
+
# -----------------------------------------------------------------------------
# Configuration options
# -----------------------------------------------------------------------------
@@ -130,6 +150,9 @@ if test "x$BUILD_SHADER_DEBUGGER" != xno; then
fi
AM_CONDITIONAL(BUILD_SHADER_DEBUGGER, [test "x$BUILD_SHADER_DEBUGGER" != xno])
+AS_IF([test "x$BUILD_SHADER_DEBUGGER" != no],
+ [enable_debugger=yes], [enable_debugger=no])
+
# -----------------------------------------------------------------------------
# To build multithread code, gcc uses -pthread, Solaris Studio cc uses -mt
@@ -157,7 +180,21 @@ AC_CONFIG_FILES([
tools/quick_dump/Makefile
debugger/Makefile
debugger/system_routine/Makefile
+ assembler/Makefile
+ assembler/doc/Makefile
+ assembler/test/Makefile
+ assembler/intel-gen4asm.pc
])
AC_OUTPUT
+# Print a summary of the compilation
+echo ""
+echo "Intel GPU tools"
+
+echo ""
+echo " • Tools:"
+echo " Assembler: ${enable_assembler}"
+echo " Debugger: ${enable_debugger}"
+echo ""
+
# vim: set ft=config ts=8 sw=8 tw=0 noet :
diff --git a/debugger/Makefile.am b/debugger/Makefile.am
index d76e2ac..f1e49b9 100644
--- a/debugger/Makefile.am
+++ b/debugger/Makefile.am
@@ -11,6 +11,7 @@ AM_CPPFLAGS = \
AM_CFLAGS = \
$(DRM_CFLAGS) \
$(PCIACCESS_CFLAGS) \
+ $(CAIRO_CFLAGS) \
$(CWARNFLAGS)
LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS)