diff options
author | Jordan Justen <jordan.l.justen@intel.com> | 2014-08-05 13:51:28 -0700 |
---|---|---|
committer | Jordan Justen <jordan.l.justen@intel.com> | 2014-08-06 00:26:04 -0700 |
commit | 78457453b720bd54885207546abc9eb47201ef4e (patch) | |
tree | 0b9e99b3f92aba75881a99dac2c2bb8cadc0148a | |
parent | 2e8849bb68a4b589057911175b98158fef9d2321 (diff) |
cs wipcs-old
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 34 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.cpp | 352 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.h | 46 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_debug.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_debug.h | 1 | ||||
-rw-r--r-- | src/mesa/main/compute.c | 2 | ||||
-rw-r--r-- | src/mesa/main/glheader.h | 6 | ||||
-rw-r--r-- | src/mesa/main/mtypes.h | 19 | ||||
-rw-r--r-- | src/mesa/main/state.c | 21 | ||||
-rw-r--r-- | src/mesa/program/program.h | 29 |
16 files changed, 536 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index ee28dd98ef..c3ce13afe6 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -43,6 +43,7 @@ i965_FILES = \ brw_clip_unfilled.c \ brw_clip_util.c \ brw_context.c \ + brw_cs.cpp \ brw_cubemap_normalize.cpp \ brw_curbe.c \ brw_dead_control_flow.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 52f2557504..4e653d8569 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -279,6 +279,20 @@ brw_init_driver_functions(struct brw_context *brw, functions->GetSamplePosition = gen6_get_sample_position; } +/* This function sets Driver functions for the context after extension + * overrides have been processed. + */ +static void +brw_init_driver_override_functions(struct brw_context *brw) +{ + struct dd_function_table *functions = &brw->ctx.Driver; + + if (brw->gen >= 7 && _mesa_extension_override_enables.ARB_compute_shader) { + functions->DispatchCompute = brw_dispatch_compute; + functions->DispatchComputeIndirect = brw_dispatch_compute_indirect; + } +} + static void brw_initialize_context_constants(struct brw_context *brw) { @@ -662,6 +676,8 @@ brwCreateContext(gl_api api, return false; } + brw_init_driver_override_functions(brw); + driContextSetFlags(ctx, flags); /* Initialize the software rasterizer and helper modules. diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1bbcf46975..f642555f5f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -153,6 +153,7 @@ enum brw_state_id { BRW_STATE_FRAGMENT_PROGRAM, BRW_STATE_GEOMETRY_PROGRAM, BRW_STATE_VERTEX_PROGRAM, + BRW_STATE_COMPUTE_PROGRAM, BRW_STATE_CURBE_OFFSETS, BRW_STATE_REDUCED_PRIMITIVE, BRW_STATE_PRIMITIVE, @@ -187,6 +188,7 @@ enum brw_state_id { #define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) #define BRW_NEW_FRAGMENT_PROGRAM (1 << BRW_STATE_FRAGMENT_PROGRAM) #define BRW_NEW_GEOMETRY_PROGRAM (1 << BRW_STATE_GEOMETRY_PROGRAM) +#define BRW_NEW_COMPUTE_PROGRAM (1 << BRW_STATE_COMPUTE_PROGRAM) #define BRW_NEW_VERTEX_PROGRAM (1 << BRW_STATE_VERTEX_PROGRAM) #define BRW_NEW_CURBE_OFFSETS (1 << BRW_STATE_CURBE_OFFSETS) #define BRW_NEW_REDUCED_PRIMITIVE (1 << BRW_STATE_REDUCED_PRIMITIVE) @@ -220,7 +222,7 @@ enum brw_state_id { #define BRW_NEW_META_IN_PROGRESS (1 << BRW_STATE_META_IN_PROGRESS) #define BRW_NEW_INTERPOLATION_MAP (1 << BRW_STATE_INTERPOLATION_MAP) #define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1 << BRW_STATE_PUSH_CONSTANT_ALLOCATION) -#define BRW_NEW_NUM_SAMPLES (1 << BRW_STATE_NUM_SAMPLES) +#define BRW_NEW_NUM_SAMPLES (1ULL << BRW_STATE_NUM_SAMPLES) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -228,7 +230,7 @@ struct brw_state_flags { /** * State update flags signalled as the result of brw_tracked_state updates */ - GLuint brw; + GLuint64 brw; /** * State update flags that used to be signalled by brw_state_cache.c * searches. @@ -522,6 +524,11 @@ struct brw_ff_gs_prog_data { }; +// struct brw_cs_prog_data { +// GLuint urb_read_length; +// }; +#define brw_cs_prog_data brw_wm_prog_data + /* Note: brw_vec4_prog_data_compare() must be updated when adding fields to * this struct! */ @@ -666,6 +673,7 @@ enum brw_cache_id { BRW_CLIP_VP, BRW_CLIP_UNIT, BRW_CLIP_PROG, + BRW_CS_PROG, BRW_MAX_CACHE }; @@ -761,6 +769,7 @@ enum shader_time_shader_type { #define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) #define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) #define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_CS_PROG (1<<BRW_CS_PROG) struct brw_vertex_buffer { /** Buffer object containing the uploaded vertex data */ @@ -1114,6 +1123,7 @@ struct brw_context int max_vs_threads; int max_gs_threads; int max_wm_threads; + int max_cs_threads; /* BRW_NEW_URB_ALLOCATIONS: */ @@ -1250,6 +1260,18 @@ struct brw_context drm_intel_bo *multisampled_null_render_target_bo; } wm; + struct { + struct brw_stage_state base; + struct brw_wm_prog_data *prog_data; + + GLuint render_surf; + + /** + * Buffer object used in place of multisampled null render targets on + * Gen6. See brw_update_null_renderbuffer_surface(). + */ + drm_intel_bo *multisampled_null_render_target_bo; + } cs; struct { uint32_t state_offset; @@ -1778,6 +1800,14 @@ gen6_upload_push_constants(struct brw_context *brw, struct brw_stage_state *stage_state, enum aub_state_struct_type type); +void brw_dispatch_compute(struct gl_context *ctx, + GLuint num_groups_x, + GLuint num_groups_y, + GLuint num_groups_z); + +void brw_dispatch_compute_indirect(struct gl_context *ctx, + GLintptr indirect); + /* ================================================================ * From linux kernel i386 header files, copes with odd sizes better * than COPY_DWORDS would: diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp new file mode 100644 index 0000000000..d9dec83f59 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -0,0 +1,352 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file brw_fs.cpp + * + * This file drives the GLSL IR -> LIR translation, contains the + * optimizations on the LIR, and drives the generation of native code + * from the LIR. + */ + +extern "C" { + +#include <sys/types.h> + +#include "util/hash_table.h" +#include "main/macros.h" +#include "main/shaderobj.h" +#include "main/fbobject.h" +#include "main/state.h" +#include "program/prog_parameter.h" +#include "program/prog_print.h" +#include "program/register_allocate.h" +#include "program/sampler.h" +#include "program/hash_table.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +} +#include "brw_fs.h" +#include "brw_dead_control_flow.h" +#include "main/uniforms.h" +#include "brw_fs_live_variables.h" +#include "glsl/glsl_types.h" +#include "intel_mipmap_tree.h" +#include "brw_state.h" +#include "brw_cs.h" + +const unsigned * +brw_cs_emit(struct brw_context *brw, + void *mem_ctx, + const struct brw_wm_prog_key *key, + struct brw_cs_prog_data *prog_data, + struct gl_fragment_program *fp, + struct gl_shader_program *prog, + unsigned *final_assembly_size) +{ + bool start_busy = false; + double start_time = 0; + + if (unlikely(brw->perf_debug)) { + start_busy = (brw->batch.last_bo && + drm_intel_bo_busy(brw->batch.last_bo)); + start_time = get_time(); + } + + struct brw_shader *shader = NULL; + if (prog) + shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + brw_dump_ir(brw, "fragment", prog, &shader->base, &fp->Base); + + /* Now the main event: Visit the shader IR and generate our FS IR for it. + */ + fs_visitor v(brw, mem_ctx, key, prog_data, prog, fp, 8); + if (!v.run()) { + if (prog) { + prog->LinkStatus = false; + ralloc_strcat(&prog->InfoLog, v.fail_msg); + } + + _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", + v.fail_msg); + + return NULL; + } + + exec_list *simd16_instructions = NULL; + fs_visitor v2(brw, mem_ctx, key, prog_data, prog, fp, 16); + if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) { + if (!v.simd16_unsupported) { + /* Try a SIMD16 compile */ + v2.import_uniforms(&v); + if (!v2.run()) { + perf_debug("SIMD16 shader failed to compile, falling back to " + "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg); + } else { + simd16_instructions = &v2.instructions; + } + } else { + perf_debug("SIMD16 shader unsupported, falling back to " + "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg); + } + } + + const unsigned *assembly = NULL; + if (brw->gen >= 8) { + gen8_fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src); + assembly = g.generate_assembly(&v.instructions, simd16_instructions, + final_assembly_size); + } else { + fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src, + v.runtime_check_aads_emit, INTEL_DEBUG & DEBUG_WM); + assembly = g.generate_assembly(&v.instructions, simd16_instructions, + final_assembly_size); + } + + if (unlikely(brw->perf_debug) && shader) { + if (shader->compiled_once) + brw_wm_debug_recompile(brw, prog, key); + shader->compiled_once = true; + + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { + perf_debug("FS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) * 1000); + } + } + + return assembly; +} + +bool +brw_cs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_wm_prog_key key; + + if (!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) + return true; + + struct gl_fragment_program *fp = (struct gl_fragment_program *) + prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + bool program_uses_dfdy = fp->UsesDFdy; + + memset(&key, 0, sizeof(key)); + + if (brw->gen < 6) { + if (fp->UsesKill) + key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* Just assume depth testing. */ + key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + } + + if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS; + + unsigned sampler_count = _mesa_fls(fp->Base.SamplersUsed); + for (unsigned i = 0; i < sampler_count; i++) { + if (fp->Base.ShadowSamplers & (1 << i)) { + /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */ + key.tex.swizzles[i] = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE); + } else { + /* Color sampler: assume no swizzling. */ + key.tex.swizzles[i] = SWIZZLE_XYZW; + } + } + + if (fp->Base.InputsRead & VARYING_BIT_POS) { + key.drawable_height = ctx->DrawBuffer->Height; + } + + key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten & + ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | + BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))); + + if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) || + key.nr_color_regions > 1; + } + + /* GL_FRAGMENT_SHADER_DERIVATIVE_HINT is almost always GL_DONT_CARE. The + * quality of the derivatives is likely to be determined by the driconf + * option. + */ + key.high_quality_derivatives = brw->disable_derivative_optimization; + + key.program_string_id = bfp->id; + + uint32_t old_prog_offset = brw->wm.base.prog_offset; + struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; + + bool success = do_wm_prog(brw, prog, bfp, &key); + + brw->wm.base.prog_offset = old_prog_offset; + brw->wm.prog_data = old_prog_data; + + return success; +} + +static void brw_cs_populate_key( struct brw_context *brw, + struct brw_cs_prog_key *key ) +{ + memset(key, 0, sizeof(*key)); +} + + +/** + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. + */ +bool do_cs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_cs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + void *mem_ctx = ralloc_context(NULL); + struct brw_cs_prog_data prog_data; + const GLuint *program; + struct gl_shader *fs = NULL; + GLuint program_size; + + if (prog) + fs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + + memset(&prog_data, 0, sizeof(prog_data)); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + // int param_count; + // if (fs) { + // param_count = fs->num_uniform_components; + // } else { + // param_count = fp->program.Base.Parameters->NumParameters * 4; + // } + // /* The backend also sometimes adds params for texture size. */ + // param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + // prog_data.base.param = rzalloc_array(NULL, const float *, param_count); + // prog_data.base.pull_param = + // rzalloc_array(NULL, const float *, param_count); + // prog_data.base.nr_params = param_count; + + // prog_data.barycentric_interp_modes = + // brw_compute_barycentric_interp_modes(brw, key->flat_shade, + // key->persample_shading, + // &fp->program); + + program = brw_cs_emit(brw, mem_ctx, key, &prog_data, + &fp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + if (prog_data.total_scratch) { + brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo, + prog_data.total_scratch * brw->max_cs_threads); + } + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) + fprintf(stderr, "\n"); + + brw_upload_cache(&brw->cache, BRW_CS_PROG, + key, sizeof(struct brw_cs_prog_key), + program, program_size, + &prog_data, sizeof(prog_data), + &brw->wm.base.prog_offset, &brw->wm.prog_data); + + ralloc_free(mem_ctx); + + return true; +} + + +static void +brw_upload_cs_prog(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + struct brw_cs_prog_key key; + struct brw_fragment_program *fp = (struct brw_fragment_program *) + brw->fragment_program; + + printf("brw_upload_cs_prog\n"); + brw_cs_populate_key(brw, &key); + + if (!brw_search_cache(&brw->cache, BRW_CS_PROG, + &key, sizeof(key), + &brw->cs.base.prog_offset, &brw->cs.prog_data)) { + bool success = do_cs_prog(brw, ctx->_Shader->_CurrentFragmentProgram, fp, + &key); + (void) success; + assert(success); + } + brw->cs.base.prog_data = &brw->cs.prog_data->base; +} + + +void brw_dispatch_compute(struct gl_context *ctx, + GLuint num_groups_x, + GLuint num_groups_y, + GLuint num_groups_z) +{ + struct brw_context *brw = brw_context(ctx); + + if (ctx->NewState) + _mesa_update_state(ctx); + + if (brw->state.dirty.brw) { + brw->no_batch_wrap = true; + brw_upload_state(brw); + } + + ASSERT(!"TODO!"); +} + +void brw_dispatch_compute_indirect(struct gl_context *ctx, + GLintptr indirect) +{ + ASSERT(!"TODO!"); +} + +const struct brw_tracked_state brw_cs_prog = { + .dirty = { + .mesa = (_NEW_BUFFERS | + _NEW_TEXTURE), + .brw = (BRW_NEW_COMPUTE_PROGRAM) + }, + .emit = brw_upload_cs_prog +}; + + diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h new file mode 100644 index 0000000000..6e891222ba --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -0,0 +1,46 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#pragma once + +#include "brw_fs.h" + +// struct brw_cs_prog_key { +// GLbitfield64 attrs; +// }; +#define brw_cs_prog_key brw_wm_prog_key + +const unsigned * +brw_cs_emit(struct brw_context *brw, + void *mem_ctx, + const struct brw_wm_prog_key *key, + struct brw_cs_prog_data *prog_data, + struct gl_fragment_program *fp, + struct gl_shader_program *prog, + unsigned *final_assembly_size); + +bool brw_cs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index d782b4fdaf..bc7ebabca1 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -70,6 +70,9 @@ static void brwBindProgram( struct gl_context *ctx, case GL_FRAGMENT_PROGRAM_ARB: brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; break; + case GL_COMPUTE_PROGRAM_NV: + brw->state.dirty.brw |= BRW_NEW_COMPUTE_PROGRAM; + break; } } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index abead1807a..3260c0bfc4 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -77,6 +77,7 @@ extern const struct brw_tracked_state brw_gs_abo_surfaces; extern const struct brw_tracked_state brw_vs_unit; extern const struct brw_tracked_state brw_gs_prog; extern const struct brw_tracked_state brw_wm_prog; +extern const struct brw_tracked_state brw_cs_prog; extern const struct brw_tracked_state brw_renderbuffer_surfaces; extern const struct brw_tracked_state brw_texture_surfaces; extern const struct brw_tracked_state brw_wm_binding_table; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 3a452c3a58..b10b14f952 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -179,6 +179,7 @@ static const struct brw_tracked_state *gen7_atoms[] = &brw_vs_prog, &brw_gs_prog, &brw_wm_prog, + &brw_cs_prog, /* Command packets: */ @@ -480,6 +481,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM), DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM), DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM), + DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM), DEFINE_BIT(BRW_NEW_CURBE_OFFSETS), DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE), DEFINE_BIT(BRW_NEW_PRIMITIVE), @@ -530,6 +532,7 @@ static struct dirty_bit_map cache_bits[] = { DEFINE_BIT(CACHE_NEW_CLIP_VP), DEFINE_BIT(CACHE_NEW_CLIP_UNIT), DEFINE_BIT(CACHE_NEW_CLIP_PROG), + DEFINE_BIT(CACHE_NEW_CS_PROG), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 19b1d3b1a1..7806d40649 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -413,6 +413,10 @@ static void brw_upload_vs_prog(struct brw_context *brw) struct gl_program *prog = (struct gl_program *) brw->vertex_program; int i; + /* If vertex program may be empty if using a compute shader */ + if (prog->Id == 0) + return; + memset(&key, 0, sizeof(key)); /* Just upload the program verbatim for now. Always send it all diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index c72fce2581..e0318bb481 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -66,6 +66,7 @@ static const struct dri_debug_control debug_control[] = { { "nodualobj", DEBUG_NO_DUAL_OBJECT_GS }, { "optimizer", DEBUG_OPTIMIZER }, { "noann", DEBUG_NO_ANNOTATION }, + { "cs", DEBUG_CS }, { NULL, 0 } }; diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index 37dc34a261..2b5cb61f87 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -62,6 +62,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_NO_DUAL_OBJECT_GS 0x80000000 #define DEBUG_OPTIMIZER 0x100000000 #define DEBUG_NO_ANNOTATION 0x200000000 +#define DEBUG_CS 0x400000000 #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c index 8ca7458b7a..33a0e29f0b 100644 --- a/src/mesa/main/compute.c +++ b/src/mesa/main/compute.c @@ -51,7 +51,7 @@ _mesa_DispatchComputeIndirect(GLintptr indirect) if (ctx->Extensions.ARB_compute_shader) { assert(ctx->Driver.DispatchComputeIndirect); - ctx->Driver.DispatchCompute(ctx, indirect); + ctx->Driver.DispatchComputeIndirect(ctx, indirect); } else { _mesa_error(ctx, GL_INVALID_OPERATION, "unsupported function (glDispatchComputeIndirect) called"); diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h index 7f7f9a39b3..e838be1686 100644 --- a/src/mesa/main/glheader.h +++ b/src/mesa/main/glheader.h @@ -141,6 +141,12 @@ typedef void *GLeglImageOES; */ #define MESA_GEOMETRY_PROGRAM 0x8c26 +/** + * Internal token for compute programs. + * Use the value for GL_COMPUTE_PROGRAM_NV for now. + */ +#define MESA_COMPUTE_PROGRAM 0x90FB + /* Several fields of struct gl_config can take these as values. Since * GLX header files may not be available everywhere they need to be used, * redefine them here. diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 3f60a55308..de399ba6a6 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2328,6 +2328,23 @@ struct gl_fragment_program_state struct gl_program_cache *Cache; }; +/** + * Context state for compute programs. + */ +struct gl_compute_program_state +{ + GLboolean Enabled; /**< GL_ARB_compute_shader */ + GLboolean _Enabled; /**< Enabled and valid program? */ + struct gl_compute_program *Current; /**< user-bound compute program */ + + /** Currently enabled and valid program (including internal programs + * and compiled shader programs). + */ + struct gl_compute_program *_Current; + + GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ +}; + /** * ATI_fragment_shader runtime state @@ -2988,6 +3005,7 @@ struct gl_shared_state struct gl_vertex_program *DefaultVertexProgram; struct gl_fragment_program *DefaultFragmentProgram; struct gl_geometry_program *DefaultGeometryProgram; + struct gl_compute_program *DefaultComputeProgram; /*@}*/ /* GL_ATI_fragment_shader */ @@ -4126,6 +4144,7 @@ struct gl_context struct gl_vertex_program_state VertexProgram; struct gl_fragment_program_state FragmentProgram; struct gl_geometry_program_state GeometryProgram; + struct gl_compute_program_state ComputeProgram; struct gl_ati_fragment_shader_state ATIFragmentShader; struct gl_pipeline_shader_state Pipeline; /**< GLSL pipeline shader object state */ diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index c122c16aae..570ff649d8 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -100,9 +100,12 @@ update_program(struct gl_context *ctx) ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; struct gl_shader_program *fsProg = ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; + struct gl_shader_program *csProg = + ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current; const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current; const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current; + const struct gl_geometry_program *prevCP = ctx->ComputeProgram._Current; GLbitfield new_state = 0x0; /* @@ -198,6 +201,16 @@ update_program(struct gl_context *ctx) _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); } + if (csProg && csProg->LinkStatus + && csProg->_LinkedShaders[MESA_SHADER_COMPUTE]) { + /* Use GLSL compute shader */ + _mesa_reference_compprog(ctx, &ctx->ComputeProgram._Current, + gl_compute_program(csProg->_LinkedShaders[MESA_SHADER_COMPUTE]->Program)); + } else { + /* No compute program */ + _mesa_reference_compprog(ctx, &ctx->ComputeProgram._Current, NULL); + } + /* Let the driver know what's happening: */ if (ctx->FragmentProgram._Current != prevFP) { @@ -224,6 +237,14 @@ update_program(struct gl_context *ctx) } } + if (ctx->ComputeProgram._Current != prevGP) { + new_state |= _NEW_PROGRAM; + if (ctx->Driver.BindProgram) { + ctx->Driver.BindProgram(ctx, MESA_COMPUTE_PROGRAM, + (struct gl_program *) ctx->ComputeProgram._Current); + } + } + return new_state; } diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h index ef698242ff..ab3cab330f 100644 --- a/src/mesa/program/program.h +++ b/src/mesa/program/program.h @@ -138,6 +138,15 @@ _mesa_reference_geomprog(struct gl_context *ctx, (struct gl_program *) prog); } +static inline void +_mesa_reference_compprog(struct gl_context *ctx, + struct gl_compute_program **ptr, + struct gl_compute_program *prog) +{ + _mesa_reference_program(ctx, (struct gl_program **) ptr, + (struct gl_program *) prog); +} + extern struct gl_program * _mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog); @@ -162,6 +171,13 @@ _mesa_clone_fragment_program(struct gl_context *ctx, return (struct gl_fragment_program *) _mesa_clone_program(ctx, &prog->Base); } +static inline struct gl_compute_program * +_mesa_clone_compute_program(struct gl_context *ctx, + const struct gl_compute_program *prog) +{ + return (struct gl_compute_program *) _mesa_clone_program(ctx, &prog->Base); +} + extern GLboolean _mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count); @@ -276,6 +292,19 @@ gl_geometry_program_const(const struct gl_program *prog) } +static inline struct gl_compute_program * +gl_compute_program(struct gl_program *prog) +{ + return (struct gl_compute_program *) prog; +} + +static inline const struct gl_compute_program * +gl_compute_program_const(const struct gl_program *prog) +{ + return (const struct gl_compute_program *) prog; +} + + #ifdef __cplusplus } /* extern "C" */ #endif |