diff options
-rw-r--r-- | src/gallium/drivers/softpipe/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_compute.c | 212 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_context.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_context.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_screen.c | 48 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_state.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_state_shader.c | 51 |
7 files changed, 325 insertions, 3 deletions
diff --git a/src/gallium/drivers/softpipe/Makefile.sources b/src/gallium/drivers/softpipe/Makefile.sources index 1d42351f975..d72266f270f 100644 --- a/src/gallium/drivers/softpipe/Makefile.sources +++ b/src/gallium/drivers/softpipe/Makefile.sources @@ -4,6 +4,7 @@ C_SOURCES := \ sp_clear.h \ sp_context.c \ sp_context.h \ + sp_compute.c \ sp_draw_arrays.c \ sp_fence.c \ sp_fence.h \ diff --git a/src/gallium/drivers/softpipe/sp_compute.c b/src/gallium/drivers/softpipe/sp_compute.c new file mode 100644 index 00000000000..88298fb4a63 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_compute.c @@ -0,0 +1,212 @@ +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pstipple.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "sp_context.h" +#include "sp_screen.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" +#include "sp_tex_tile_cache.h" +#include "tgsi/tgsi_parse.h" + +static void +cs_prepare(const struct sp_compute_shader *cs, + struct tgsi_exec_machine *machine, + int w, int h, int d, + int g_w, int g_h, int g_d, + int b_w, int b_h, int b_d, + struct tgsi_sampler *sampler, + struct tgsi_image *image, + struct tgsi_buffer *buffer ) +{ + int j; + /* + * Bind tokens/shader to the interpreter's machine state. + */ + tgsi_exec_machine_bind_shader(machine, + cs->tokens, + sampler, image, buffer); + + if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) { + unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID]; + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + machine->SystemValue[i].xyzw[0].i[j] = w; + machine->SystemValue[i].xyzw[1].i[j] = h; + machine->SystemValue[i].xyzw[2].i[j] = d; + } + } + + if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) { + unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE]; + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + machine->SystemValue[i].xyzw[0].i[j] = g_w; + machine->SystemValue[i].xyzw[1].i[j] = g_h; + machine->SystemValue[i].xyzw[2].i[j] = g_d; + } + } + + if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) { + unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE]; + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + machine->SystemValue[i].xyzw[0].i[j] = b_w; + machine->SystemValue[i].xyzw[1].i[j] = b_h; + machine->SystemValue[i].xyzw[2].i[j] = b_d; + } + } +} + +static bool +cs_run(const struct sp_compute_shader *cs, + int g_w, int g_h, int g_d, + struct tgsi_exec_machine *machine, bool restart) +{ + if (!restart) { + if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) { + unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID]; + int j; + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + machine->SystemValue[i].xyzw[0].i[j] = g_w; + machine->SystemValue[i].xyzw[1].i[j] = g_h; + machine->SystemValue[i].xyzw[2].i[j] = g_d; + } + } + machine->NonHelperMask = (1 << 1) - 1; + } + + tgsi_exec_machine_run(machine, restart ? machine->pc : 0); + + if (machine->pc != -1) + return true; + return false; +} + +static void +run_workgroup(const struct sp_compute_shader *cs, + int g_w, int g_h, int g_d, int num_threads, + struct tgsi_exec_machine **machines) +{ + int i; + bool grp_hit_barrier, restart_threads = false; + + do { + grp_hit_barrier = false; + for (i = 0; i < num_threads; i++) { + grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads); + } + restart_threads = false; + if (grp_hit_barrier) { + grp_hit_barrier = false; + restart_threads = true; + } + } while (restart_threads); +} + +static void +cs_delete(const struct sp_compute_shader *cs, + struct tgsi_exec_machine *machine) +{ + if (machine->Tokens == cs->tokens) { + tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL); + } +} + +static void +fill_grid_size(struct pipe_context *context, + const struct pipe_grid_info *info, + uint32_t grid_size[3]) +{ + struct pipe_transfer *transfer; + uint32_t *params; + if (!info->indirect) { + grid_size[0] = info->grid[0]; + grid_size[1] = info->grid[1]; + grid_size[2] = info->grid[2]; + return; + } + params = pipe_buffer_map_range(context, info->indirect, + info->indirect_offset, + 3 * sizeof(uint32_t), + PIPE_TRANSFER_READ, + &transfer); + + if (!transfer) + return; + + grid_size[0] = params[0]; + grid_size[1] = params[1]; + grid_size[2] = params[2]; + pipe_buffer_unmap(context, transfer); +} + +void +softpipe_launch_grid(struct pipe_context *context, + const struct pipe_grid_info *info) +{ + struct softpipe_context *softpipe = softpipe_context(context); + struct sp_compute_shader *cs = softpipe->cs; + int num_threads_in_group; + struct tgsi_exec_machine **machines; + int bwidth, bheight, bdepth; + int w, h, d, i; + int g_w, g_h, g_d; + uint32_t grid_size[3]; + void *local_mem = NULL; + + bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH]; + bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT]; + bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]; + num_threads_in_group = bwidth * bheight * bdepth; + + fill_grid_size(context, info, grid_size); + + if (cs->shader.req_local_mem) { + local_mem = CALLOC(1, cs->shader.req_local_mem); + } + + machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group); + if (!machines) + return; + + /* initialise machines + GRID_SIZE + THREAD_ID + BLOCK_SIZE */ + for (d = 0; d < bdepth; d++) { + for (h = 0; h < bheight; h++) { + for (w = 0; w < bwidth; w++) { + int idx = w + (h * bwidth) + (d * bheight * bwidth); + machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE); + + machines[idx]->LocalMem = local_mem; + machines[idx]->LocalMemSize = cs->shader.req_local_mem; + cs_prepare(cs, machines[idx], + w, h, d, + grid_size[0], grid_size[1], grid_size[2], + bwidth, bheight, bdepth, + (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE], + (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE], + (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]); + tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS, + softpipe->mapped_constants[PIPE_SHADER_COMPUTE], + softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]); + } + } + } + + for (g_d = 0; g_d < grid_size[2]; g_d++) { + for (g_h = 0; g_h < grid_size[1]; g_h++) { + for (g_w = 0; g_w < grid_size[0]; g_w++) { + run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines); + } + } + } + + for (i = 0; i < num_threads_in_group; i++) { + cs_delete(cs, machines[i]); + tgsi_exec_machine_destroy(machines[i]); + } + + FREE(local_mem); + FREE(machines); +} diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index e3ec52462a8..1690e38f1ca 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -212,6 +212,7 @@ softpipe_create_context(struct pipe_screen *screen, softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE ); softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); + softpipe->dump_cs = debug_get_bool_option( "SOFTPIPE_DUMP_CS", FALSE ); softpipe->pipe.screen = screen; softpipe->pipe.destroy = softpipe_destroy; @@ -233,6 +234,8 @@ softpipe_create_context(struct pipe_screen *screen, softpipe->pipe.draw_vbo = softpipe_draw_vbo; + softpipe->pipe.launch_grid = softpipe_launch_grid; + softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush_wrapped; softpipe->pipe.texture_barrier = softpipe_texture_barrier; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 70d00c88b6e..a57f5875537 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -71,6 +71,7 @@ struct softpipe_context { struct sp_geometry_shader *gs; struct sp_velems_state *velems; struct sp_so_state *so; + struct sp_compute_shader *cs; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -205,10 +206,11 @@ struct softpipe_context { * XXX wouldn't it make more sense for the tile cache to just be part * of sp_sampler_view? */ - struct softpipe_tex_tile_cache *tex_cache[PIPE_SHADER_GEOMETRY+1][PIPE_MAX_SHADER_SAMPLER_VIEWS]; + struct softpipe_tex_tile_cache *tex_cache[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; unsigned dump_fs : 1; unsigned dump_gs : 1; + unsigned dump_cs : 1; unsigned no_rast : 1; }; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index d89d95c884c..4beeb801b90 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -157,7 +157,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: return 0; case PIPE_CAP_COMPUTE: - return 0; + return 1; case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_USER_INDEX_BUFFERS: case PIPE_CAP_USER_CONSTANT_BUFFERS: @@ -289,6 +289,8 @@ softpipe_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe { case PIPE_SHADER_FRAGMENT: return tgsi_exec_get_shader_param(param); + case PIPE_SHADER_COMPUTE: + return tgsi_exec_get_shader_param(param); case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: if (sp_screen->use_llvm) @@ -447,6 +449,48 @@ softpipe_get_timestamp(struct pipe_screen *_screen) return os_time_get_nano(); } +static int +softpipe_get_compute_param(struct pipe_screen *_screen, + enum pipe_shader_ir ir_type, + enum pipe_compute_cap param, + void *ret) +{ + switch (param) { + case PIPE_COMPUTE_CAP_IR_TARGET: + return 0; + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + if (ret) { + uint64_t *grid_size = ret; + grid_size[0] = 65535; + grid_size[1] = 65535; + grid_size[2] = 65535; + } + return 3 * sizeof(uint64_t) ; + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + if (ret) { + uint64_t *block_size = ret; + block_size[0] = 1024; + block_size[1] = 1024; + block_size[2] = 1024; + } + return 3 * sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + if (ret) { + uint64_t *max_threads_per_block = ret; + *max_threads_per_block = 2048; + } + return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: + if (ret) { + uint64_t *max_local_size = ret; + /* Value reported by the closed source driver. */ + *max_local_size = 32768; + } + return sizeof(uint64_t); + } + return 0; +} + /** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no softpipe_screen). @@ -473,7 +517,7 @@ softpipe_create_screen(struct sw_winsys *winsys) screen->base.is_format_supported = softpipe_is_format_supported; screen->base.context_create = softpipe_create_context; screen->base.flush_frontbuffer = softpipe_flush_frontbuffer; - + screen->base.get_compute_param = softpipe_get_compute_param; screen->use_llvm = debug_get_option_use_llvm(); util_format_s3tc_init(); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 2fc48ab13d8..ad07895c4f6 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -131,6 +131,12 @@ struct sp_so_state { struct pipe_stream_output_info base; }; +/** Subclass of pipe_compute_state */ +struct sp_compute_shader { + struct pipe_compute_state shader; + struct tgsi_token *tokens; + struct tgsi_shader_info info; +}; void softpipe_init_blend_funcs(struct pipe_context *pipe); @@ -213,4 +219,7 @@ void softpipe_cleanup_geometry_sampling(struct softpipe_context *ctx); +void +softpipe_launch_grid(struct pipe_context *context, + const struct pipe_grid_info *info); #endif diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c index f0d66a53ec6..c871beeffc8 100644 --- a/src/gallium/drivers/softpipe/sp_state_shader.c +++ b/src/gallium/drivers/softpipe/sp_state_shader.c @@ -378,6 +378,53 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, } } +static void * +softpipe_create_compute_state(struct pipe_context *pipe, + const struct pipe_compute_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + const struct tgsi_token *tokens; + struct sp_compute_shader *state; + if (templ->ir_type != PIPE_SHADER_IR_TGSI) + return NULL; + + tokens = templ->prog; + /* debug */ + if (softpipe->dump_cs) + tgsi_dump(tokens, 0); + + state = CALLOC_STRUCT(sp_compute_shader); + + state->shader = *templ; + state->tokens = tgsi_dup_tokens(tokens); + + tgsi_scan_shader(state->tokens, &state->info); + return state; +} + +static void +softpipe_bind_compute_state(struct pipe_context *pipe, + void *cs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_compute_shader *state = (struct sp_compute_shader *)cs; + if (softpipe->cs == state) + return; + + softpipe->cs = state; +} + +static void +softpipe_delete_compute_state(struct pipe_context *pipe, + void *cs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_compute_shader *state = (struct sp_compute_shader *)cs; + + assert(softpipe->cs != state); + tgsi_free_tokens(state->tokens); + FREE(state); +} void softpipe_init_shader_funcs(struct pipe_context *pipe) @@ -395,4 +442,8 @@ softpipe_init_shader_funcs(struct pipe_context *pipe) pipe->delete_gs_state = softpipe_delete_gs_state; pipe->set_constant_buffer = softpipe_set_constant_buffer; + + pipe->create_compute_state = softpipe_create_compute_state; + pipe->bind_compute_state = softpipe_bind_compute_state; + pipe->delete_compute_state = softpipe_delete_compute_state; } |