summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Justen <jordan.l.justen@intel.com>2014-08-05 13:51:28 -0700
committerJordan Justen <jordan.l.justen@intel.com>2014-08-06 00:26:04 -0700
commit78457453b720bd54885207546abc9eb47201ef4e (patch)
tree0b9e99b3f92aba75881a99dac2c2bb8cadc0148a
parent2e8849bb68a4b589057911175b98158fef9d2321 (diff)
cs wipcs-old
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h34
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.cpp352
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.h46
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.c1
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.h1
-rw-r--r--src/mesa/main/compute.c2
-rw-r--r--src/mesa/main/glheader.h6
-rw-r--r--src/mesa/main/mtypes.h19
-rw-r--r--src/mesa/main/state.c21
-rw-r--r--src/mesa/program/program.h29
16 files changed, 536 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index ee28dd98ef..c3ce13afe6 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -43,6 +43,7 @@ i965_FILES = \
brw_clip_unfilled.c \
brw_clip_util.c \
brw_context.c \
+ brw_cs.cpp \
brw_cubemap_normalize.cpp \
brw_curbe.c \
brw_dead_control_flow.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 52f2557504..4e653d8569 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -279,6 +279,20 @@ brw_init_driver_functions(struct brw_context *brw,
functions->GetSamplePosition = gen6_get_sample_position;
}
+/* This function sets Driver functions for the context after extension
+ * overrides have been processed.
+ */
+static void
+brw_init_driver_override_functions(struct brw_context *brw)
+{
+ struct dd_function_table *functions = &brw->ctx.Driver;
+
+ if (brw->gen >= 7 && _mesa_extension_override_enables.ARB_compute_shader) {
+ functions->DispatchCompute = brw_dispatch_compute;
+ functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
+ }
+}
+
static void
brw_initialize_context_constants(struct brw_context *brw)
{
@@ -662,6 +676,8 @@ brwCreateContext(gl_api api,
return false;
}
+ brw_init_driver_override_functions(brw);
+
driContextSetFlags(ctx, flags);
/* Initialize the software rasterizer and helper modules.
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 1bbcf46975..f642555f5f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -153,6 +153,7 @@ enum brw_state_id {
BRW_STATE_FRAGMENT_PROGRAM,
BRW_STATE_GEOMETRY_PROGRAM,
BRW_STATE_VERTEX_PROGRAM,
+ BRW_STATE_COMPUTE_PROGRAM,
BRW_STATE_CURBE_OFFSETS,
BRW_STATE_REDUCED_PRIMITIVE,
BRW_STATE_PRIMITIVE,
@@ -187,6 +188,7 @@ enum brw_state_id {
#define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE)
#define BRW_NEW_FRAGMENT_PROGRAM (1 << BRW_STATE_FRAGMENT_PROGRAM)
#define BRW_NEW_GEOMETRY_PROGRAM (1 << BRW_STATE_GEOMETRY_PROGRAM)
+#define BRW_NEW_COMPUTE_PROGRAM (1 << BRW_STATE_COMPUTE_PROGRAM)
#define BRW_NEW_VERTEX_PROGRAM (1 << BRW_STATE_VERTEX_PROGRAM)
#define BRW_NEW_CURBE_OFFSETS (1 << BRW_STATE_CURBE_OFFSETS)
#define BRW_NEW_REDUCED_PRIMITIVE (1 << BRW_STATE_REDUCED_PRIMITIVE)
@@ -220,7 +222,7 @@ enum brw_state_id {
#define BRW_NEW_META_IN_PROGRESS (1 << BRW_STATE_META_IN_PROGRESS)
#define BRW_NEW_INTERPOLATION_MAP (1 << BRW_STATE_INTERPOLATION_MAP)
#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1 << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
-#define BRW_NEW_NUM_SAMPLES (1 << BRW_STATE_NUM_SAMPLES)
+#define BRW_NEW_NUM_SAMPLES (1ULL << BRW_STATE_NUM_SAMPLES)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -228,7 +230,7 @@ struct brw_state_flags {
/**
* State update flags signalled as the result of brw_tracked_state updates
*/
- GLuint brw;
+ GLuint64 brw;
/**
* State update flags that used to be signalled by brw_state_cache.c
* searches.
@@ -522,6 +524,11 @@ struct brw_ff_gs_prog_data {
};
+// struct brw_cs_prog_data {
+// GLuint urb_read_length;
+// };
+#define brw_cs_prog_data brw_wm_prog_data
+
/* Note: brw_vec4_prog_data_compare() must be updated when adding fields to
* this struct!
*/
@@ -666,6 +673,7 @@ enum brw_cache_id {
BRW_CLIP_VP,
BRW_CLIP_UNIT,
BRW_CLIP_PROG,
+ BRW_CS_PROG,
BRW_MAX_CACHE
};
@@ -761,6 +769,7 @@ enum shader_time_shader_type {
#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_CS_PROG (1<<BRW_CS_PROG)
struct brw_vertex_buffer {
/** Buffer object containing the uploaded vertex data */
@@ -1114,6 +1123,7 @@ struct brw_context
int max_vs_threads;
int max_gs_threads;
int max_wm_threads;
+ int max_cs_threads;
/* BRW_NEW_URB_ALLOCATIONS:
*/
@@ -1250,6 +1260,18 @@ struct brw_context
drm_intel_bo *multisampled_null_render_target_bo;
} wm;
+ struct {
+ struct brw_stage_state base;
+ struct brw_wm_prog_data *prog_data;
+
+ GLuint render_surf;
+
+ /**
+ * Buffer object used in place of multisampled null render targets on
+ * Gen6. See brw_update_null_renderbuffer_surface().
+ */
+ drm_intel_bo *multisampled_null_render_target_bo;
+ } cs;
struct {
uint32_t state_offset;
@@ -1778,6 +1800,14 @@ gen6_upload_push_constants(struct brw_context *brw,
struct brw_stage_state *stage_state,
enum aub_state_struct_type type);
+void brw_dispatch_compute(struct gl_context *ctx,
+ GLuint num_groups_x,
+ GLuint num_groups_y,
+ GLuint num_groups_z);
+
+void brw_dispatch_compute_indirect(struct gl_context *ctx,
+ GLintptr indirect);
+
/* ================================================================
* From linux kernel i386 header files, copes with odd sizes better
* than COPY_DWORDS would:
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
new file mode 100644
index 0000000000..d9dec83f59
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -0,0 +1,352 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs.cpp
+ *
+ * This file drives the GLSL IR -> LIR translation, contains the
+ * optimizations on the LIR, and drives the generation of native code
+ * from the LIR.
+ */
+
+extern "C" {
+
+#include <sys/types.h>
+
+#include "util/hash_table.h"
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "main/fbobject.h"
+#include "main/state.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/register_allocate.h"
+#include "program/sampler.h"
+#include "program/hash_table.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+}
+#include "brw_fs.h"
+#include "brw_dead_control_flow.h"
+#include "main/uniforms.h"
+#include "brw_fs_live_variables.h"
+#include "glsl/glsl_types.h"
+#include "intel_mipmap_tree.h"
+#include "brw_state.h"
+#include "brw_cs.h"
+
+const unsigned *
+brw_cs_emit(struct brw_context *brw,
+ void *mem_ctx,
+ const struct brw_wm_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ struct gl_fragment_program *fp,
+ struct gl_shader_program *prog,
+ unsigned *final_assembly_size)
+{
+ bool start_busy = false;
+ double start_time = 0;
+
+ if (unlikely(brw->perf_debug)) {
+ start_busy = (brw->batch.last_bo &&
+ drm_intel_bo_busy(brw->batch.last_bo));
+ start_time = get_time();
+ }
+
+ struct brw_shader *shader = NULL;
+ if (prog)
+ shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+
+ if (unlikely(INTEL_DEBUG & DEBUG_CS))
+ brw_dump_ir(brw, "fragment", prog, &shader->base, &fp->Base);
+
+ /* Now the main event: Visit the shader IR and generate our FS IR for it.
+ */
+ fs_visitor v(brw, mem_ctx, key, prog_data, prog, fp, 8);
+ if (!v.run()) {
+ if (prog) {
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
+ }
+
+ _mesa_problem(NULL, "Failed to compile fragment shader: %s\n",
+ v.fail_msg);
+
+ return NULL;
+ }
+
+ exec_list *simd16_instructions = NULL;
+ fs_visitor v2(brw, mem_ctx, key, prog_data, prog, fp, 16);
+ if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) {
+ if (!v.simd16_unsupported) {
+ /* Try a SIMD16 compile */
+ v2.import_uniforms(&v);
+ if (!v2.run()) {
+ perf_debug("SIMD16 shader failed to compile, falling back to "
+ "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
+ } else {
+ simd16_instructions = &v2.instructions;
+ }
+ } else {
+ perf_debug("SIMD16 shader unsupported, falling back to "
+ "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg);
+ }
+ }
+
+ const unsigned *assembly = NULL;
+ if (brw->gen >= 8) {
+ gen8_fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src);
+ assembly = g.generate_assembly(&v.instructions, simd16_instructions,
+ final_assembly_size);
+ } else {
+ fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src,
+ v.runtime_check_aads_emit, INTEL_DEBUG & DEBUG_WM);
+ assembly = g.generate_assembly(&v.instructions, simd16_instructions,
+ final_assembly_size);
+ }
+
+ if (unlikely(brw->perf_debug) && shader) {
+ if (shader->compiled_once)
+ brw_wm_debug_recompile(brw, prog, key);
+ shader->compiled_once = true;
+
+ if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+ perf_debug("FS compile took %.03f ms and stalled the GPU\n",
+ (get_time() - start_time) * 1000);
+ }
+ }
+
+ return assembly;
+}
+
+bool
+brw_cs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_wm_prog_key key;
+
+ if (!prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
+ return true;
+
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)
+ prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
+ struct brw_fragment_program *bfp = brw_fragment_program(fp);
+ bool program_uses_dfdy = fp->UsesDFdy;
+
+ memset(&key, 0, sizeof(key));
+
+ if (brw->gen < 6) {
+ if (fp->UsesKill)
+ key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ /* Just assume depth testing. */
+ key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+ key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+ }
+
+ if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
+ BRW_FS_VARYING_INPUT_MASK) > 16)
+ key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+
+ unsigned sampler_count = _mesa_fls(fp->Base.SamplersUsed);
+ for (unsigned i = 0; i < sampler_count; i++) {
+ if (fp->Base.ShadowSamplers & (1 << i)) {
+ /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+ key.tex.swizzles[i] =
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+ } else {
+ /* Color sampler: assume no swizzling. */
+ key.tex.swizzles[i] = SWIZZLE_XYZW;
+ }
+ }
+
+ if (fp->Base.InputsRead & VARYING_BIT_POS) {
+ key.drawable_height = ctx->DrawBuffer->Height;
+ }
+
+ key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
+ ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
+ BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+
+ if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+ key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
+ key.nr_color_regions > 1;
+ }
+
+ /* GL_FRAGMENT_SHADER_DERIVATIVE_HINT is almost always GL_DONT_CARE. The
+ * quality of the derivatives is likely to be determined by the driconf
+ * option.
+ */
+ key.high_quality_derivatives = brw->disable_derivative_optimization;
+
+ key.program_string_id = bfp->id;
+
+ uint32_t old_prog_offset = brw->wm.base.prog_offset;
+ struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+
+ bool success = do_wm_prog(brw, prog, bfp, &key);
+
+ brw->wm.base.prog_offset = old_prog_offset;
+ brw->wm.prog_data = old_prog_data;
+
+ return success;
+}
+
+static void brw_cs_populate_key( struct brw_context *brw,
+ struct brw_cs_prog_key *key )
+{
+ memset(key, 0, sizeof(*key));
+}
+
+
+/**
+ * All Mesa program -> GPU code generation goes through this function.
+ * Depending on the instructions used (i.e. flow control instructions)
+ * we'll use one of two code generators.
+ */
+bool do_cs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_fragment_program *fp,
+ struct brw_cs_prog_key *key)
+{
+ struct gl_context *ctx = &brw->ctx;
+ void *mem_ctx = ralloc_context(NULL);
+ struct brw_cs_prog_data prog_data;
+ const GLuint *program;
+ struct gl_shader *fs = NULL;
+ GLuint program_size;
+
+ if (prog)
+ fs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+
+ memset(&prog_data, 0, sizeof(prog_data));
+
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ */
+ // int param_count;
+ // if (fs) {
+ // param_count = fs->num_uniform_components;
+ // } else {
+ // param_count = fp->program.Base.Parameters->NumParameters * 4;
+ // }
+ // /* The backend also sometimes adds params for texture size. */
+ // param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
+ // prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
+ // prog_data.base.pull_param =
+ // rzalloc_array(NULL, const float *, param_count);
+ // prog_data.base.nr_params = param_count;
+
+ // prog_data.barycentric_interp_modes =
+ // brw_compute_barycentric_interp_modes(brw, key->flat_shade,
+ // key->persample_shading,
+ // &fp->program);
+
+ program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
+ &fp->program, prog, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ if (prog_data.total_scratch) {
+ brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo,
+ prog_data.total_scratch * brw->max_cs_threads);
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_WM))
+ fprintf(stderr, "\n");
+
+ brw_upload_cache(&brw->cache, BRW_CS_PROG,
+ key, sizeof(struct brw_cs_prog_key),
+ program, program_size,
+ &prog_data, sizeof(prog_data),
+ &brw->wm.base.prog_offset, &brw->wm.prog_data);
+
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+
+static void
+brw_upload_cs_prog(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct brw_cs_prog_key key;
+ struct brw_fragment_program *fp = (struct brw_fragment_program *)
+ brw->fragment_program;
+
+ printf("brw_upload_cs_prog\n");
+ brw_cs_populate_key(brw, &key);
+
+ if (!brw_search_cache(&brw->cache, BRW_CS_PROG,
+ &key, sizeof(key),
+ &brw->cs.base.prog_offset, &brw->cs.prog_data)) {
+ bool success = do_cs_prog(brw, ctx->_Shader->_CurrentFragmentProgram, fp,
+ &key);
+ (void) success;
+ assert(success);
+ }
+ brw->cs.base.prog_data = &brw->cs.prog_data->base;
+}
+
+
+void brw_dispatch_compute(struct gl_context *ctx,
+ GLuint num_groups_x,
+ GLuint num_groups_y,
+ GLuint num_groups_z)
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ if (ctx->NewState)
+ _mesa_update_state(ctx);
+
+ if (brw->state.dirty.brw) {
+ brw->no_batch_wrap = true;
+ brw_upload_state(brw);
+ }
+
+ ASSERT(!"TODO!");
+}
+
+void brw_dispatch_compute_indirect(struct gl_context *ctx,
+ GLintptr indirect)
+{
+ ASSERT(!"TODO!");
+}
+
+const struct brw_tracked_state brw_cs_prog = {
+ .dirty = {
+ .mesa = (_NEW_BUFFERS |
+ _NEW_TEXTURE),
+ .brw = (BRW_NEW_COMPUTE_PROGRAM)
+ },
+ .emit = brw_upload_cs_prog
+};
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
new file mode 100644
index 0000000000..6e891222ba
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#pragma once
+
+#include "brw_fs.h"
+
+// struct brw_cs_prog_key {
+// GLbitfield64 attrs;
+// };
+#define brw_cs_prog_key brw_wm_prog_key
+
+const unsigned *
+brw_cs_emit(struct brw_context *brw,
+ void *mem_ctx,
+ const struct brw_wm_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ struct gl_fragment_program *fp,
+ struct gl_shader_program *prog,
+ unsigned *final_assembly_size);
+
+bool brw_cs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index d782b4fdaf..bc7ebabca1 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -70,6 +70,9 @@ static void brwBindProgram( struct gl_context *ctx,
case GL_FRAGMENT_PROGRAM_ARB:
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
break;
+ case GL_COMPUTE_PROGRAM_NV:
+ brw->state.dirty.brw |= BRW_NEW_COMPUTE_PROGRAM;
+ break;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index abead1807a..3260c0bfc4 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -77,6 +77,7 @@ extern const struct brw_tracked_state brw_gs_abo_surfaces;
extern const struct brw_tracked_state brw_vs_unit;
extern const struct brw_tracked_state brw_gs_prog;
extern const struct brw_tracked_state brw_wm_prog;
+extern const struct brw_tracked_state brw_cs_prog;
extern const struct brw_tracked_state brw_renderbuffer_surfaces;
extern const struct brw_tracked_state brw_texture_surfaces;
extern const struct brw_tracked_state brw_wm_binding_table;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 3a452c3a58..b10b14f952 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -179,6 +179,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
&brw_vs_prog,
&brw_gs_prog,
&brw_wm_prog,
+ &brw_cs_prog,
/* Command packets: */
@@ -480,6 +481,7 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+ DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
DEFINE_BIT(BRW_NEW_PRIMITIVE),
@@ -530,6 +532,7 @@ static struct dirty_bit_map cache_bits[] = {
DEFINE_BIT(CACHE_NEW_CLIP_VP),
DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
DEFINE_BIT(CACHE_NEW_CLIP_PROG),
+ DEFINE_BIT(CACHE_NEW_CS_PROG),
{0, 0, 0}
};
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 19b1d3b1a1..7806d40649 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -413,6 +413,10 @@ static void brw_upload_vs_prog(struct brw_context *brw)
struct gl_program *prog = (struct gl_program *) brw->vertex_program;
int i;
+ /* If vertex program may be empty if using a compute shader */
+ if (prog->Id == 0)
+ return;
+
memset(&key, 0, sizeof(key));
/* Just upload the program verbatim for now. Always send it all
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index c72fce2581..e0318bb481 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -66,6 +66,7 @@ static const struct dri_debug_control debug_control[] = {
{ "nodualobj", DEBUG_NO_DUAL_OBJECT_GS },
{ "optimizer", DEBUG_OPTIMIZER },
{ "noann", DEBUG_NO_ANNOTATION },
+ { "cs", DEBUG_CS },
{ NULL, 0 }
};
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index 37dc34a261..2b5cb61f87 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -62,6 +62,7 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_NO_DUAL_OBJECT_GS 0x80000000
#define DEBUG_OPTIMIZER 0x100000000
#define DEBUG_NO_ANNOTATION 0x200000000
+#define DEBUG_CS 0x400000000
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c
index 8ca7458b7a..33a0e29f0b 100644
--- a/src/mesa/main/compute.c
+++ b/src/mesa/main/compute.c
@@ -51,7 +51,7 @@ _mesa_DispatchComputeIndirect(GLintptr indirect)
if (ctx->Extensions.ARB_compute_shader) {
assert(ctx->Driver.DispatchComputeIndirect);
- ctx->Driver.DispatchCompute(ctx, indirect);
+ ctx->Driver.DispatchComputeIndirect(ctx, indirect);
} else {
_mesa_error(ctx, GL_INVALID_OPERATION,
"unsupported function (glDispatchComputeIndirect) called");
diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h
index 7f7f9a39b3..e838be1686 100644
--- a/src/mesa/main/glheader.h
+++ b/src/mesa/main/glheader.h
@@ -141,6 +141,12 @@ typedef void *GLeglImageOES;
*/
#define MESA_GEOMETRY_PROGRAM 0x8c26
+/**
+ * Internal token for compute programs.
+ * Use the value for GL_COMPUTE_PROGRAM_NV for now.
+ */
+#define MESA_COMPUTE_PROGRAM 0x90FB
+
/* Several fields of struct gl_config can take these as values. Since
* GLX header files may not be available everywhere they need to be used,
* redefine them here.
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 3f60a55308..de399ba6a6 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2328,6 +2328,23 @@ struct gl_fragment_program_state
struct gl_program_cache *Cache;
};
+/**
+ * Context state for compute programs.
+ */
+struct gl_compute_program_state
+{
+ GLboolean Enabled; /**< GL_ARB_compute_shader */
+ GLboolean _Enabled; /**< Enabled and valid program? */
+ struct gl_compute_program *Current; /**< user-bound compute program */
+
+ /** Currently enabled and valid program (including internal programs
+ * and compiled shader programs).
+ */
+ struct gl_compute_program *_Current;
+
+ GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
+};
+
/**
* ATI_fragment_shader runtime state
@@ -2988,6 +3005,7 @@ struct gl_shared_state
struct gl_vertex_program *DefaultVertexProgram;
struct gl_fragment_program *DefaultFragmentProgram;
struct gl_geometry_program *DefaultGeometryProgram;
+ struct gl_compute_program *DefaultComputeProgram;
/*@}*/
/* GL_ATI_fragment_shader */
@@ -4126,6 +4144,7 @@ struct gl_context
struct gl_vertex_program_state VertexProgram;
struct gl_fragment_program_state FragmentProgram;
struct gl_geometry_program_state GeometryProgram;
+ struct gl_compute_program_state ComputeProgram;
struct gl_ati_fragment_shader_state ATIFragmentShader;
struct gl_pipeline_shader_state Pipeline; /**< GLSL pipeline shader object state */
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index c122c16aae..570ff649d8 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -100,9 +100,12 @@ update_program(struct gl_context *ctx)
ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
struct gl_shader_program *fsProg =
ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+ struct gl_shader_program *csProg =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current;
+ const struct gl_geometry_program *prevCP = ctx->ComputeProgram._Current;
GLbitfield new_state = 0x0;
/*
@@ -198,6 +201,16 @@ update_program(struct gl_context *ctx)
_mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL);
}
+ if (csProg && csProg->LinkStatus
+ && csProg->_LinkedShaders[MESA_SHADER_COMPUTE]) {
+ /* Use GLSL compute shader */
+ _mesa_reference_compprog(ctx, &ctx->ComputeProgram._Current,
+ gl_compute_program(csProg->_LinkedShaders[MESA_SHADER_COMPUTE]->Program));
+ } else {
+ /* No compute program */
+ _mesa_reference_compprog(ctx, &ctx->ComputeProgram._Current, NULL);
+ }
+
/* Let the driver know what's happening:
*/
if (ctx->FragmentProgram._Current != prevFP) {
@@ -224,6 +237,14 @@ update_program(struct gl_context *ctx)
}
}
+ if (ctx->ComputeProgram._Current != prevGP) {
+ new_state |= _NEW_PROGRAM;
+ if (ctx->Driver.BindProgram) {
+ ctx->Driver.BindProgram(ctx, MESA_COMPUTE_PROGRAM,
+ (struct gl_program *) ctx->ComputeProgram._Current);
+ }
+ }
+
return new_state;
}
diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h
index ef698242ff..ab3cab330f 100644
--- a/src/mesa/program/program.h
+++ b/src/mesa/program/program.h
@@ -138,6 +138,15 @@ _mesa_reference_geomprog(struct gl_context *ctx,
(struct gl_program *) prog);
}
+static inline void
+_mesa_reference_compprog(struct gl_context *ctx,
+ struct gl_compute_program **ptr,
+ struct gl_compute_program *prog)
+{
+ _mesa_reference_program(ctx, (struct gl_program **) ptr,
+ (struct gl_program *) prog);
+}
+
extern struct gl_program *
_mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog);
@@ -162,6 +171,13 @@ _mesa_clone_fragment_program(struct gl_context *ctx,
return (struct gl_fragment_program *) _mesa_clone_program(ctx, &prog->Base);
}
+static inline struct gl_compute_program *
+_mesa_clone_compute_program(struct gl_context *ctx,
+ const struct gl_compute_program *prog)
+{
+ return (struct gl_compute_program *) _mesa_clone_program(ctx, &prog->Base);
+}
+
extern GLboolean
_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count);
@@ -276,6 +292,19 @@ gl_geometry_program_const(const struct gl_program *prog)
}
+static inline struct gl_compute_program *
+gl_compute_program(struct gl_program *prog)
+{
+ return (struct gl_compute_program *) prog;
+}
+
+static inline const struct gl_compute_program *
+gl_compute_program_const(const struct gl_program *prog)
+{
+ return (const struct gl_compute_program *) prog;
+}
+
+
#ifdef __cplusplus
} /* extern "C" */
#endif