glsl,i965: Initial implementation of GLSL shader cache

This uses the recently-added glsl/cache.c to write out two different kinds of things to the disk: 1. The final, compiled, linked binary for a shader program 2. A serialization of various state that's required in order to successfully load a binary written out in (1), this is referred to as "metadata" throughout the implementation. The hash key for these objects is a block of data including the hash of the program, (which in turn is a hash of the hashes of each GLSL source string), as well as the hash of each of the relevant per-stage keys, (to capture variations due to state-specific recompilation). FIXME: It's tirivial to add anything we need to this block of data. We should do things we know we want like Mesa version now, before pushing this out to upstream. Additionally, this commit uses the put_key/get_key support in the cache put the SHA-1 hash of the source string for each successfully compiled shader into the cache. This allows for early, optimistic returns from glCompileShader (if the identical source string had been successfully compiled in the pase), in the hope that the final, linked shader will be found in the cache. FIXME: In the cases of this optimism being misplaced, (such as a state change requiring deferred compilation), the subsequent support for later compiling is not present in this commit. This should be fixed. Many thanks to Kristian Høgsberg <krh@bitplanet.net> who provided the initial proof-of-concept on which this implementation is based. Some of the improvements on top of his implementation include: * Storing the size of all cached binaries in the cached program metadata. (This eliminates a "should be big enough" hack in the proof-of-concept). * Add support to serializing and deserialize uniforms of type sampler, (original proof-of-concept serialized only uniforms of type uint, int, float, and bool). * Add TexturesUsed and SamplersUsed to cached metadata. * Rename load_from_cache to read_program_metadata, add a shader_cache_ preficx to both this function and write_program_metadata. * Implement all of the metadata serialization/deserialization code using the util/blob API as opposed to meticulous pre-computation of sizes and single-malloc allocation. The former style will not be maintainable as we start serializing more arbitrary data structures such as hash tables. * Fix broken serialization and deserialization of params pointers. The original proof-of-concept assumed all params should have storage in prog->UniformDataSlows. But that assumption is only correct for uniforms appearing explicitly in the GLSL source, (and not for implicit uniforms such as the matrix values resulting from ftransform()).
author: Carl Worth <cworth@cworth.org> 2015-03-16 11:18:53 -0700
committer: Carl Worth <cworth@cworth.org> 2015-07-10 10:25:16 -0700
commit: 44fb1d29d096aeff2b3e1a106976dc77d244c898 (patch)
tree: 5b774b6a81a3c05e67d5086d798b73b8e80c34d6
parent: 0b1599f054b8866df03317d1970921d096332d68 (diff)
20 files changed, 946 insertions, 7 deletions
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 6b1b075b1d..f97ef7ce70 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -196,7 +196,9 @@ LIBGLSL_FILES = \
 
 LIBGLSL_SHADER_CACHE_FILES = \
 	cache.c \
-	cache.h
+	cache.h \
+	shader_cache.cpp \
+	shader_cache.h
 
 # glsl_compiler
 
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 046d5d7b5b..01bd1697da 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -35,6 +35,8 @@
 #include "glsl_parser.h"
 #include "ir_optimization.h"
 #include "loop_analysis.h"
+#include "cache.h"
+#include "util/mesa-sha1.h"
 
 /**
  * Format a short human-readable description of the given GLSL version.
@@ -1503,6 +1505,16 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
    state->error = glcpp_preprocess(state, &source, &state->info_log,
                              &ctx->Extensions, ctx);
 
+   char buf[41];
+   _mesa_sha1_compute(source, strlen(source), shader->sha1);
+   if (ctx->Cache && cache_has_key(ctx->Cache, shader->sha1)) {
+      /* We've seen this shader before and know it compiles */
+      printf("deferring compile of shader: %s\n",
+             _mesa_sha1_format(buf, shader->sha1));
+      shader->CompileStatus = true;
+      return;
+   }
+
    if (!state->error) {
      _mesa_glsl_lexer_ctor(state, source);
      _mesa_glsl_parse(state);
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index f54a9393e7..712890d0bb 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -106,6 +106,7 @@ enum glsl_matrix_layout {
 #include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */
 
 struct glsl_type {
+
    GLenum gl_type;
    glsl_base_type base_type;
 
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 11ae06f9bf..ad4da89267 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -1062,6 +1062,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
    prog->NumUniformStorage = num_uniforms;
    prog->NumHiddenUniforms = hidden_uniforms;
    prog->UniformStorage = uniforms;
+   prog->NumUniformDataSlots = num_data_slots;
+   prog->UniformDataSlots = data;
 
    link_set_image_access_qualifiers(prog);
    link_set_uniform_initializers(prog, boolean_true);
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index b7a783c098..25530a243c 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -76,6 +76,9 @@
 #include "ir_optimization.h"
 #include "ir_rvalue_visitor.h"
 #include "ir_uniform.h"
+#include "util/mesa-sha1.h"
+#include "cache.h"
+#include "shader_cache.h"
 
 #include "main/shaderobj.h"
 #include "main/enums.h"
@@ -2839,6 +2842,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    tfeedback_decl *tfeedback_decls = NULL;
    unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying;
 
+   if (shader_cache_read_program_metadata(ctx, prog))
+      return;
+
    void *mem_ctx = ralloc_context(NULL); // temporary linker context
 
    prog->LinkStatus = true; /* All error paths will set this to false */
diff --git a/src/glsl/linker.h b/src/glsl/linker.h
index ce3dc32329..8fe63d705f 100644
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -201,3 +201,5 @@ void
 linker_warning(gl_shader_program *prog, const char *fmt, ...);
 
 #endif /* GLSL_LINKER_H */
+
+
diff --git a/src/glsl/shader_cache.cpp b/src/glsl/shader_cache.cpp
new file mode 100644
index 0000000000..ff92b69b9b
--- /dev/null
+++ b/src/glsl/shader_cache.cpp
@@ -0,0 +1,443 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file shader_cache.c
+ *
+ * GLSL shader cache implementation
+ *
+ * This uses the generic cache in cache.c to implement a cache of linked
+ * shader programs.
+ *
+ * \author Tapani Pälli <tapani.palli@intel.com>
+ * \author Kristian Høgsberg <kristian.h.kristensen@intel.com>
+ * \author Carl Worth <carl.d.worth@intel.com>
+ */
+
+#include "main/core.h"
+#include "glsl_symbol_table.h"
+#include "glsl_parser_extras.h"
+#include "ir.h"
+#include "program.h"
+#include "program/hash_table.h"
+#include "linker.h"
+#include "link_varyings.h"
+#include "ir_optimization.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_uniform.h"
+#include "util/mesa-sha1.h"
+#include "blob.h"
+#include "cache.h"
+
+extern "C" {
+#include "main/shaderobj.h"
+#include "main/enums.h"
+}
+
+struct cache_program {
+   uint32_t num_uniforms;
+   uint32_t samplers_validated;
+   uint32_t uniforms_offset;
+   uint32_t num_data_slots;
+   uint32_t num_remap_entries;
+   uint32_t remap_offset;
+   struct {
+      uint64_t inputs_read;
+      uint64_t outputs_written;
+      GLbitfield textures_used[MAX_COMBINED_TEXTURE_IMAGE_UNITS];
+      uint64_t samplers_used;
+   } vs, fs;
+};
+
+struct cache_uniform {
+   uint32_t name;
+   uint32_t storage;
+   uint32_t type;
+};
+
+static void
+encode_type_to_blob(struct blob *blob, const glsl_type *type)
+{
+   uint32_t encoding;
+
+   /* FIXME: This only handles scalar/vector/matrix types.  We encode those
+    * immediately in the uint32_t, but for aggregate types, we have to make
+    * the lower 24 bits an index into a pool of type descriptions. */
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      encoding = (type->base_type << 24) |
+         (type->vector_elements << 4) |
+         (type->matrix_columns);
+      break;
+   case GLSL_TYPE_SAMPLER:
+      encoding = (type->base_type) << 24 |
+         (type->sampler_dimensionality << 4) |
+         (type->sampler_shadow << 3) |
+         (type->sampler_array << 2) |
+         (type->sampler_type);
+      break;
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_INTERFACE:
+   case GLSL_TYPE_ARRAY:
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   default:
+      printf ("FIXME: Do not yet know how to correctly serialize type %d (%s)\n", type->base_type, type->name);
+      /* FIXME: Serialize these...*/
+      encoding = 0;
+      break;
+   }
+
+   blob_write_uint32(blob, encoding);
+}
+
+static const glsl_type *
+decode_type_from_blob(struct blob_reader *blob)
+{
+   uint32_t u = blob_read_uint32(blob);
+   glsl_base_type base_type = (glsl_base_type) (u >> 24);
+
+   switch (base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      return glsl_type::get_instance(base_type, (u >> 4) & 0x0f, u & 0x0f);
+   case GLSL_TYPE_SAMPLER:
+      return glsl_type::get_sampler_instance((enum glsl_sampler_dim) ((u >> 4) & 0x07),
+                                             (u >> 3) & 0x01,
+                                             (u >> 2) & 0x01,
+                                             (glsl_base_type) ((u >> 0) & 0x03));
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_INTERFACE:
+   case GLSL_TYPE_ARRAY:
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+      /* FIXME: Serialize these...*/
+   default:
+      return NULL;
+   }
+}
+
+static void
+write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
+{
+   uint32_t i;
+
+   blob_write_uint32(metadata, prog->SamplersValidated);
+   blob_write_uint32(metadata, prog->NumUniformStorage);
+   blob_write_uint32(metadata, prog->NumUniformDataSlots);
+
+   for (i = 0; i < prog->NumUniformStorage; i++) {
+      printf("uniform %s %s\n",
+             prog->UniformStorage[i].type->name,
+             prog->UniformStorage[i].name);
+      blob_write_string(metadata, prog->UniformStorage[i].name);
+      blob_write_uint32(metadata, prog->UniformStorage[i].storage - prog->UniformDataSlots);
+      blob_write_uint32(metadata, prog->UniformStorage[i].remap_location);
+      encode_type_to_blob(metadata, prog->UniformStorage[i].type);
+   }
+}
+
+static void
+read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
+{
+   struct gl_uniform_storage *uniforms;
+   union gl_constant_value *data;
+   uint32_t i;
+
+   prog->SamplersValidated = blob_read_uint32(metadata);
+   prog->NumUniformStorage = blob_read_uint32(metadata);
+   prog->NumUniformDataSlots = blob_read_uint32(metadata);
+
+   uniforms = rzalloc_array(prog, struct gl_uniform_storage,
+                            prog->NumUniformStorage);
+   prog->UniformStorage = uniforms;
+
+   data = rzalloc_array(uniforms, union gl_constant_value,
+                        prog->NumUniformDataSlots);
+   prog->UniformDataSlots = data;
+
+   prog->UniformHash = new string_to_uint_map;
+
+   for (i = 0; i < prog->NumUniformStorage; i++) {
+      uniforms[i].name = ralloc_strdup(prog, blob_read_string (metadata));
+      uniforms[i].storage = data + blob_read_uint32(metadata);
+      uniforms[i].remap_location = blob_read_uint32(metadata);
+      uniforms[i].block_index = -1;
+      uniforms[i].atomic_buffer_index = -1;
+      uniforms[i].type = decode_type_from_blob(metadata);
+      prog->UniformHash->put(i, uniforms[i].name);
+
+      printf("uniform %d: %s %s\n",
+             i, uniforms[i].type->name, uniforms[i].name);
+   }
+}
+
+
+static void
+write_uniform_remap_table(struct blob *metadata, struct gl_shader_program *prog)
+{
+   blob_write_uint32(metadata, prog->NumUniformRemapTable);
+
+   for (unsigned i = 0; i < prog->NumUniformRemapTable; i++) {
+      blob_write_uint32(metadata, prog->UniformRemapTable[i] - prog->UniformStorage);
+   }
+}
+
+static void
+read_uniform_remap_table(struct blob_reader *metadata,
+                         struct gl_shader_program *prog)
+{
+   unsigned i;
+
+   prog->NumUniformRemapTable = blob_read_uint32(metadata);
+
+   prog->UniformRemapTable =rzalloc_array(prog, struct gl_uniform_storage *,
+                                          prog->NumUniformRemapTable);
+
+   for (i = 0; i < prog->NumUniformRemapTable; i++) {
+      prog->UniformRemapTable[i] = prog->UniformStorage + blob_read_uint32(metadata);
+   }
+}
+
+static void
+write_shader_parameters(struct blob *metadata,
+                        struct gl_program_parameter_list *params)
+{
+   unsigned i;
+   struct gl_program_parameter *param;
+
+   blob_write_uint32(metadata, params->NumParameters);
+
+   for (i = 0; i < params->NumParameters; i++) {
+      param = &params->Parameters[i];
+
+      blob_write_uint32(metadata, param->Type);
+      blob_write_string(metadata, param->Name);
+      blob_write_uint32(metadata, param->Size);
+      blob_write_uint32(metadata, param->DataType);
+      blob_write_bytes(metadata, param->StateIndexes,
+                       sizeof(param->StateIndexes));
+   }
+
+   blob_write_uint32(metadata, params->StateFlags);
+}
+
+static void
+read_shader_parameters(struct blob_reader *metadata,
+                       struct gl_program_parameter_list *params)
+{
+   uint32_t i, num_parameters;
+   const char *name;
+   gl_register_file type;
+   GLuint size;
+   GLenum data_type;
+   gl_state_index state_indexes[STATE_LENGTH];
+
+   num_parameters = blob_read_uint32(metadata);
+
+   for (i = 0; i < num_parameters; i++) {
+
+      type = (gl_register_file) blob_read_uint32(metadata);
+      name = blob_read_string(metadata);
+      size = blob_read_uint32(metadata);
+      data_type = blob_read_uint32(metadata);
+      blob_copy_bytes(metadata, (uint8_t *) state_indexes,
+                      sizeof(state_indexes));
+
+      _mesa_add_parameter(params, type, name, size, data_type,
+                          NULL, state_indexes);
+   }
+
+   params->StateFlags = blob_read_uint32(metadata);
+}
+
+static void
+write_shader_metadata(struct blob *metadata, gl_shader *shader)
+{
+   struct gl_program *glprog;
+
+   if (shader->Program) {
+      glprog = shader->Program;
+      /* Use the lowest bit to indicate that there is shader_metadata here. */
+      blob_write_uint64(metadata, glprog->InputsRead << 1 | 1);
+      blob_write_uint64(metadata, glprog->OutputsWritten);
+      blob_write_bytes(metadata, glprog->TexturesUsed, sizeof(glprog->TexturesUsed));
+      blob_write_uint64(metadata, glprog->SamplersUsed);
+      blob_write_uint64(metadata, shader->num_samplers);
+
+      write_shader_parameters(metadata, glprog->Parameters);
+
+   } else {
+      /* An initial value of 0 indicates that this shader is not present. */
+      blob_write_uint64(metadata, 0);
+   }
+}
+
+static void
+read_shader_metadata(struct blob_reader *metadata,
+                     struct gl_program *glprog,
+                     gl_shader *linked)
+{
+   uint64_t has_shader;
+
+   has_shader = blob_read_uint64(metadata);
+
+   if (has_shader) {
+      glprog->InputsRead = has_shader >> 1;
+      glprog->OutputsWritten = blob_read_uint64(metadata);
+      memcpy(glprog->TexturesUsed, blob_read_bytes(metadata, sizeof(glprog->TexturesUsed)), sizeof(glprog->TexturesUsed));
+      glprog->SamplersUsed = blob_read_uint64(metadata);
+      linked->num_samplers = blob_read_uint64(metadata);
+
+      glprog->Parameters = _mesa_new_parameter_list();
+
+      read_shader_parameters(metadata, glprog->Parameters);
+
+      linked->Program = glprog;
+   } else {
+      linked->Program = NULL;
+   }
+}
+
+void
+shader_cache_write_program_metadata(struct gl_context *ctx,
+                                    struct gl_shader_program *prog)
+{
+   struct blob *metadata;
+   char sha1_buf[41];
+   struct program_cache *cache;
+
+   cache = ctx->Cache;
+   if (!cache)
+      return;
+
+   /* We should be able to serialize any valid combinations of shaders, but
+    * for now we only support vs+fs. */
+   if (!prog->_LinkedShaders[MESA_SHADER_VERTEX] ||
+       !prog->_LinkedShaders[MESA_SHADER_FRAGMENT] ||
+       prog->_LinkedShaders[MESA_SHADER_GEOMETRY])
+      return;
+
+   metadata = blob_create(NULL);
+
+   write_uniforms(metadata, prog);
+
+   write_uniform_remap_table(metadata, prog);
+
+   write_shader_metadata(metadata, prog->_LinkedShaders[MESA_SHADER_VERTEX]);
+   write_shader_metadata(metadata, prog->_LinkedShaders[MESA_SHADER_FRAGMENT]);
+
+   for (unsigned i = 0; i < prog->NumShaders; i++) {
+      cache_put_key(cache, prog->Shaders[i]->sha1);
+      printf("marking shader: %s\n",
+             _mesa_sha1_format(sha1_buf, prog->Shaders[i]->sha1));
+   }
+
+   cache_put(cache, prog->sha1, metadata->data, metadata->size);
+
+   ralloc_free(metadata);
+
+   printf("putting program metadata in cache: %s\n",
+          _mesa_sha1_format(sha1_buf, prog->sha1));
+}
+
+bool
+shader_cache_read_program_metadata(struct gl_context *ctx,
+                                   struct gl_shader_program *prog)
+{
+   const char *stage_name[] = { "vs", "gs", "fs", "cs" };
+   char buf[256], sha1buf[41];
+   int offset = 0;
+   uint8_t *buffer;
+   struct program_cache *cache;
+   size_t size;
+   struct blob_reader metadata;
+   struct gl_program *glprog;
+   gl_shader *linked;
+
+   cache = ctx->Cache;
+   if (!cache)
+      return false;
+
+   for (unsigned i = 0; i < prog->NumShaders; i++) {
+      if (prog->Shaders[i]->Source == NULL)
+         return false;
+
+      offset += snprintf(buf + offset, sizeof(buf) - offset,
+                         "%s: %s\n",
+                         stage_name[prog->Shaders[i]->Stage],
+                         _mesa_sha1_format(sha1buf, prog->Shaders[i]->sha1));
+   }
+
+   _mesa_sha1_compute(buf, offset, prog->sha1);
+   buffer = (uint8_t *) cache_get(cache, prog->sha1, &size);
+   if (buffer == NULL) {
+      /* FIXME: Fall back and link shaders here, if necessary, compile any
+       * shaders we didn't compile earlier. */
+      return false;
+   }
+
+   printf("loading shader program meta data from cache: %s\n",
+          _mesa_sha1_format(sha1buf, prog->sha1));
+
+   blob_reader_init(&metadata, buffer, size);
+
+   assert(prog->UniformStorage == NULL);
+
+   read_uniforms(&metadata, prog);
+
+   read_uniform_remap_table(&metadata, prog);
+
+   linked = ctx->Driver.NewShader(NULL, 0, GL_VERTEX_SHADER);
+   glprog = ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, prog->Name);
+   read_shader_metadata(&metadata, glprog, linked);
+   //_mesa_reference_program(ctx, &linked->Program, glprog);
+      glprog->RefCount++;
+   _mesa_reference_shader(ctx, &prog->_LinkedShaders[MESA_SHADER_VERTEX], linked);
+
+   linked = ctx->Driver.NewShader(NULL, 0, GL_FRAGMENT_SHADER);
+   glprog = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, prog->Name);
+   read_shader_metadata(&metadata, glprog, linked);
+   //_mesa_reference_program(ctx, &linked->Program, glprog);
+   glprog->RefCount++;
+   _mesa_reference_shader(ctx, &prog->_LinkedShaders[MESA_SHADER_FRAGMENT], linked);
+
+   if (metadata.current != metadata.end || metadata.overrun) {
+      printf ("Error reading shader metadata. FIXME At this point, we should discard the item from the cache and rebuild from source.\n");
+   }
+
+   prog->LinkStatus = true;
+
+   free (buffer);
+
+   return true;
+}
diff --git a/src/glsl/shader_cache.h b/src/glsl/shader_cache.h
new file mode 100644
index 0000000000..ffcf4f8279
--- /dev/null
+++ b/src/glsl/shader_cache.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+#ifndef SHADER_CACHE
+#define SHADER_CACHE
+
+#include "cache.h"
+
+void
+shader_cache_write_program_metadata(struct gl_context *ctx,
+                                    struct gl_shader_program *prog);
+
+bool
+shader_cache_read_program_metadata(struct gl_context *ctx,
+                                   struct gl_shader_program *prog);
+
+#endif /* GLSL_SYMBOL_TABLE */
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 5a33aacbc2..e2476d1d77 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -101,6 +101,7 @@ i965_FILES = \
 	brw_sf_state.c \
 	brw_shader.cpp \
 	brw_shader.h \
+	brw_shader_cache.cpp \
 	brw_state_batch.c \
 	brw_state_cache.c \
 	brw_state_dump.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 65f34c368d..9851ff45be 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -430,6 +430,8 @@ struct brw_wm_prog_data {
     * For varying slots that are not used by the FS, the value is -1.
     */
    int urb_setup[VARYING_SLOT_MAX];
+
+   GLuint program_size;
 };
 
 /* Note: brw_cs_prog_data_compare() must be updated when adding fields to this
@@ -643,6 +645,8 @@ struct brw_vs_prog_data {
 
    bool uses_vertexid;
    bool uses_instanceid;
+
+   GLuint program_size;
 };
 
 /** Number of texture sampler units */
@@ -1227,6 +1231,9 @@ struct brw_context
    const struct gl_fragment_program *fragment_program;
    const struct gl_compute_program *compute_program;
 
+   bool program_written_to_cache;
+   unsigned char binary_sha1[20];
+
    /**
     * Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so
     * that we don't have to reemit that state every time we change FBOs.
diff --git a/src/mesa/drivers/dri/i965/brw_shader_cache.c b/src/mesa/drivers/dri/i965/brw_shader_cache.c
new file mode 100644
index 0000000000..81fc055a55
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_shader_cache.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <util/macros.h>
+#include <util/mesa-sha1.h>
+#include <main/mtypes.h>
+#include <glsl/glsl_parser_extras.h>
+#include <glsl/ir_uniform.h>
+#include <glsl/cache.h>
+#include <glsl/blob.h>
+
+#include "brw_state.h"
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_context.h"
+
+/* Hack to avoid repeatedly loading a binary from the disk cache */
+static int been_there[64];
+
+void
+upload_cached_program(struct brw_context *brw)
+{
+   char sha1_buf[41];
+   size_t size;
+   uint8_t *buffer;
+   struct blob_reader binary;
+   struct gl_shader_program *prog;
+   struct brw_wm_prog_key wm_key;
+   struct brw_vs_prog_key vs_key;
+   unsigned char sha1[20];
+   char manifest[256];
+   int i, offset = 0;
+   uint32_t *w;
+   struct program_cache *cache;
+   uint8_t *vs_program, *wm_program;
+   size_t vs_program_size, wm_program_size;
+   struct brw_vs_prog_data *vs_prog_data;
+   struct brw_wm_prog_data *wm_prog_data;
+   struct brw_stage_prog_data *prog_data;
+   size_t vs_prog_data_size, wm_prog_data_size;
+   intptr_t parameter_values_base;
+   intptr_t uniform_data_slots_base;
+   void *local = ralloc_context(NULL);
+   uint32_t nr_params, nr_pull_params;
+
+   if (!brw_state_dirty(brw, 0,
+			BRW_NEW_FRAGMENT_PROGRAM |
+			BRW_NEW_GEOMETRY_PROGRAM |
+			BRW_NEW_VERTEX_PROGRAM))
+      return;
+
+   cache = brw->ctx.Cache;
+   if (cache == NULL)
+      return;
+
+   prog = brw->ctx.Shader.ActiveProgram;
+   if (prog == NULL)
+      return;
+
+   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
+                      "program: %s\n", _mesa_sha1_format(sha1_buf, prog->sha1));
+
+   brw_wm_populate_key(brw, &wm_key);
+   _mesa_sha1_compute(&wm_key, sizeof wm_key, sha1);
+   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
+                      "wm_key: %s\n", _mesa_sha1_format(sha1_buf, sha1));
+
+   brw_vs_populate_key(brw, &vs_key);
+   _mesa_sha1_compute(&vs_key, sizeof vs_key, sha1);
+   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
+                      "vs_key: %s\n", _mesa_sha1_format(sha1_buf, sha1));
+
+   _mesa_sha1_compute(manifest, strlen(manifest), brw->binary_sha1);
+
+   w = (uint32_t *) brw->binary_sha1;
+   if (been_there[*w & 63])
+      return;
+   been_there[*w & 63] = 1;
+
+   buffer = cache_get(cache, brw->binary_sha1, &size);
+   if (buffer == NULL)
+      goto FAIL;
+
+   printf("populating bo cache with binary: %s\n",
+          _mesa_sha1_format(sha1_buf, brw->binary_sha1));
+
+   blob_reader_init(&binary, buffer, size);
+
+   /* Read VS program from blob. */
+   vs_program_size = blob_read_uint32(&binary);
+
+   vs_program = blob_read_bytes(&binary, vs_program_size);
+
+   /* Read VS program_data from blob and fixup params pointers. */
+   vs_prog_data_size = blob_read_uint32(&binary);
+   if (vs_prog_data_size != sizeof *vs_prog_data)
+      goto FAIL;
+
+   vs_prog_data = blob_read_bytes(&binary, vs_prog_data_size);
+   prog_data = &vs_prog_data->base.base;
+
+   parameter_values_base = blob_read_intptr(&binary);
+   uniform_data_slots_base = blob_read_intptr(&binary);
+
+   nr_params = blob_read_uint32(&binary);
+   if (nr_params != prog_data->nr_params)
+      goto FAIL;
+
+   prog_data->param = rzalloc_array(local, const gl_constant_value *, nr_params);
+   printf("Allocating %d prog_data->params (%p)\n",
+          prog_data->nr_params, prog_data->param);
+
+   for (i = 0; i < nr_params; i++) {
+      intptr_t param = blob_read_intptr(&binary);
+      ptrdiff_t p_offset, u_offset;
+      struct gl_program_parameter_list *param_list =
+         prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program->Parameters;
+
+      p_offset = (param - parameter_values_base) / sizeof(gl_constant_value);
+      u_offset = (param - uniform_data_slots_base) / sizeof(gl_constant_value);
+      
+      if (p_offset >= 0 && p_offset < 4 * param_list->NumParameters) {
+         prog_data->param[i] = ((gl_constant_value *) param_list->ParameterValues) + p_offset;
+      } else if (u_offset >= 0 && u_offset < prog->NumUniformDataSlots) {
+         prog_data->param[i] = prog->UniformDataSlots + u_offset;
+      } else {
+         printf("Error: Failed to fixup pointer value %p\n", (void *) param);
+         goto FAIL;
+      }
+   }
+
+   nr_pull_params = blob_read_uint32(&binary);
+   if (nr_pull_params != prog_data->nr_pull_params)
+      goto FAIL;
+
+   prog_data->pull_param = rzalloc_array(local, const gl_constant_value *,
+                                         nr_pull_params);
+
+
+   for (i = 0; i < nr_pull_params; i++) {
+      intptr_t pull_param = blob_read_intptr(&binary);
+      /* FIXME: We need to fixup pull_params pointers here. */
+   }
+
+   brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
+		    &vs_key, sizeof(struct brw_vs_prog_key),
+		    vs_program, vs_program_size,
+                    vs_prog_data, vs_prog_data_size,
+		    &brw->vs.base.prog_offset, &brw->vs.prog_data);
+
+   /* Read WM program from blob. */
+   wm_program_size = blob_read_uint32(&binary);
+
+   wm_program = blob_read_bytes(&binary, wm_program_size);
+
+   /* Read WM program_data from blob and fixup params pointers. */
+   wm_prog_data_size = blob_read_uint32(&binary);
+   if (wm_prog_data_size != sizeof *wm_prog_data)
+      goto FAIL;
+
+   wm_prog_data = blob_read_bytes(&binary, wm_prog_data_size);
+   prog_data = &wm_prog_data->base;
+
+   parameter_values_base = blob_read_intptr(&binary);
+
+   nr_params = blob_read_uint32(&binary);
+   if (nr_params != prog_data->nr_params)
+      goto FAIL;
+
+   prog_data->param = rzalloc_array(local, const gl_constant_value *, nr_params);
+   printf("Allocating %d prog_data->params (%p)\n",
+          prog_data->nr_params, prog_data->param);
+
+   for (i = 0; i < nr_params; i++) {
+      intptr_t param = blob_read_intptr(&binary);
+      ptrdiff_t p_offset, u_offset;
+      struct gl_program_parameter_list *param_list =
+         prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->Parameters;
+
+      p_offset = (param - parameter_values_base) / sizeof(gl_constant_value);
+      u_offset = (param - uniform_data_slots_base) / sizeof(gl_constant_value);
+      
+      if (p_offset >= 0 && p_offset < 4 * param_list->NumParameters) {
+         prog_data->param[i] = ((gl_constant_value *) param_list->ParameterValues) + p_offset;
+      } else if (u_offset >= 0 && u_offset < prog->NumUniformDataSlots) {
+         prog_data->param[i] = prog->UniformDataSlots + u_offset;
+      } else {
+         printf("Error: Failed to fixup pointer value %p\n", (void *) param);
+         goto FAIL;
+      }
+   }
+
+   nr_pull_params = blob_read_uint32(&binary);
+   if (nr_pull_params != prog_data->nr_pull_params)
+      goto FAIL;
+
+   prog_data->pull_param = rzalloc_array(local, const gl_constant_value *,
+                                         nr_pull_params);
+
+
+   for (i = 0; i < nr_pull_params; i++) {
+      intptr_t pull_param = blob_read_intptr(&binary);
+      /* FIXME: We need to fixup pull_params pointers here. */
+   }
+
+   brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
+		    &wm_key, sizeof(struct brw_wm_prog_key),
+                    wm_program, wm_program_size,
+                    wm_prog_data, wm_prog_data_size,
+		    &brw->wm.base.prog_offset, &brw->wm.prog_data);
+
+   if (binary.current != binary.end || binary.overrun) {
+      printf ("Error reading program from cache (did not read every byte written)\n");
+      goto FAIL;
+   }
+
+   printf ("%s: Successfully read every byte written!\n", __FUNCTION__);
+   brw->program_written_to_cache = true;
+
+/* FIXME: I'm currently leaking anything allocated off of this local
+ * context. What we really want here is a context that lives across both
+ * upload_cached_program and write_cached program. To implement this, Ken
+ * suggests rewriting brw_state_upload.c:brw_upload_state() to pull the code
+ * called for the common atoms out of the loop, and then explicitly call these
+ * caching functions around those, (rather than hooking into the atoms to call
+ * our cache functions).
+
+   ralloc_free(local);
+*/
+   free(buffer);
+   return;
+
+FAIL:
+   /* Fall back and compile from source here. */
+   brw->program_written_to_cache = false;
+   local = NULL;
+   printf("FIXME: May need to fallback to compile from source here...\n");
+   free(buffer);
+}
+
+void
+write_cached_program(struct brw_context *brw)
+{
+   struct blob *binary;
+   uint8_t *blob_cursor;
+   size_t vs_program_size, wm_program_size;
+   uint32_t nr_params, nr_pull_params;
+   struct gl_shader_program *prog;
+   struct program_cache *cache;
+   char buf[41];
+   unsigned i;
+
+   if (!brw_state_dirty(brw, 0,
+			BRW_NEW_FRAGMENT_PROGRAM |
+			BRW_NEW_GEOMETRY_PROGRAM |
+			BRW_NEW_VERTEX_PROGRAM))
+      return;
+
+   cache = brw->ctx.Cache;
+   if (cache == NULL)
+      return;
+
+   prog = brw->ctx.Shader.ActiveProgram;
+   if (prog == NULL)
+      return;
+
+   if (brw->program_written_to_cache)
+      return;
+
+   binary = blob_create (NULL);
+   if (binary == NULL)
+      return;
+
+   /* Write VS program to blob. */
+   vs_program_size = brw->vs.prog_data->program_size;
+
+   blob_write_uint32(binary, vs_program_size);
+
+   blob_cursor = blob_reserve_bytes(binary, vs_program_size);
+   drm_intel_bo_get_subdata(brw->cache.bo, brw->vs.base.prog_offset,
+                            vs_program_size, blob_cursor);
+
+   /* Write VS program_data to blob. */
+   blob_write_uint32(binary, sizeof *brw->vs.prog_data);
+   blob_write_bytes(binary, brw->vs.prog_data, sizeof *brw->vs.prog_data);
+
+   /* Include variable-length params from end of brw_stage_prog_data as well.
+    *
+    * Before writing either of the params or pull_params arrays, we first
+    * write out the addresses of the ParameterValues and UniformDataSlots
+    * storage. The pointers within params will be pointers to within one of
+    * these blocks of storage. So we can use the addresses of this storage
+    * together with the pointer values to correctly construct pointers to the
+    * actual storage when the program data is loaded from the cache.
+    */
+
+   blob_write_intptr(binary,
+                     (intptr_t) prog->_LinkedShaders[MESA_SHADER_VERTEX]->
+                      Program->Parameters->ParameterValues);
+   blob_write_intptr(binary, (intptr_t) prog->UniformDataSlots);
+
+   nr_params = brw->vs.prog_data->base.base.nr_params;
+   blob_write_uint32(binary, nr_params);
+
+   for (i = 0; i < nr_params; i++) {
+      blob_write_intptr(binary,
+                        (intptr_t) brw->vs.prog_data->base.base.param[i]);
+   }
+
+   nr_pull_params = brw->vs.prog_data->base.base.nr_pull_params;
+   blob_write_uint32(binary, nr_pull_params);
+
+   for (i = 0; i < nr_pull_params; i++) {
+      blob_write_intptr(binary,
+                        (intptr_t) brw->vs.prog_data->base.base.pull_param[i]);
+   }
+   
+   /* Write WM program to blob. */
+   wm_program_size = brw->wm.prog_data->program_size;
+
+   blob_write_uint32(binary, wm_program_size);
+
+   blob_cursor = blob_reserve_bytes(binary, wm_program_size);
+   drm_intel_bo_get_subdata(brw->cache.bo, brw->wm.base.prog_offset,
+                            wm_program_size, blob_cursor);
+
+   /* Write WM program_data to blob. */
+   blob_write_uint32(binary, sizeof *brw->wm.prog_data);
+   blob_write_bytes(binary, brw->wm.prog_data, sizeof *brw->wm.prog_data);
+
+   /* Include variable-length params, (don't need to rewrite UniformDataSlots
+    * pointer). */
+
+   blob_write_intptr(binary,
+                     (intptr_t) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->
+                     Program->Parameters->ParameterValues);
+
+   nr_params = brw->wm.prog_data->base.nr_params;
+   blob_write_uint32(binary, nr_params);
+
+   for (i = 0; i < nr_params; i++) {
+      blob_write_intptr(binary,
+                        (intptr_t) brw->wm.prog_data->base.param[i]);
+   }
+
+   nr_pull_params = brw->wm.prog_data->base.nr_pull_params;
+   blob_write_uint32(binary, nr_pull_params);
+
+   for (i = 0; i < nr_pull_params; i++) {
+      blob_write_intptr(binary,
+                        (intptr_t) brw->wm.prog_data->base.pull_param[i]);
+   }
+
+   printf("putting binary in cache: %s\n",
+          _mesa_sha1_format(buf, brw->binary_sha1));
+
+   cache_put(cache, brw->binary_sha1, binary->data, binary->size);
+
+   ralloc_free (binary);
+
+   brw->program_written_to_cache = true;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 987672f881..716b291af3 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -176,6 +176,12 @@ brw_depthbuffer_format(struct brw_context *brw);
 /* gen8_misc_state.c */
 void gen8_upload_state_base_address(struct brw_context *brw);
 
+/* brw_shader_cache.h */
+void
+upload_cached_program(struct brw_context *brw);
+
+void
+write_cached_program(struct brw_context *brw);
 
 /***********************************************************************
  * brw_state.c
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 7662c3b580..a284fc487c 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -30,7 +30,6 @@
   */
 
 
-
 #include "brw_context.h"
 #include "brw_state.h"
 #include "drivers/common/meta.h"
@@ -621,6 +620,8 @@ brw_upload_programs(struct brw_context *brw,
                     enum brw_pipeline pipeline)
 {
    if (pipeline == BRW_RENDER_PIPELINE) {
+      upload_cached_program(brw);
+
       brw_upload_vs_prog(brw);
 
       if (brw->gen < 6)
@@ -629,6 +630,9 @@ brw_upload_programs(struct brw_context *brw,
          brw_upload_gs_prog(brw);
 
       brw_upload_wm_prog(brw);
+
+      write_cached_program(brw);
+
    } else if (pipeline == BRW_COMPUTE_PIPELINE) {
       brw_upload_cs_prog(brw);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 6e9848fb1e..1a1bc9b6d2 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -213,6 +213,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
 		    program, program_size,
 		    &prog_data, sizeof(prog_data),
 		    &brw->vs.base.prog_offset, &brw->vs.prog_data);
+
+   brw->vs.prog_data->program_size = program_size;
+
    ralloc_free(mem_ctx);
 
    return true;
@@ -315,12 +318,11 @@ brw_vs_state_dirty(struct brw_context *brw)
                           BRW_NEW_VS_ATTRIB_WORKAROUNDS);
 }
 
-static void
+void
 brw_vs_populate_key(struct brw_context *brw,
                     struct brw_vs_prog_key *key)
 {
    struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_VERTEX_PROGRAM */
    struct brw_vertex_program *vp =
       (struct brw_vertex_program *)brw->vertex_program;
    struct gl_program *prog = (struct gl_program *) brw->vertex_program;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 61f9b006a5..22b2f6b80f 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -81,6 +81,10 @@ brw_codegen_vs_prog(struct brw_context *brw,
                     struct brw_vertex_program *vp,
                     struct brw_vs_prog_key *key);
 
+void
+brw_vs_populate_key(struct brw_context *brw,
+                    struct brw_vs_prog_key *key);
+
 #ifdef __cplusplus
 } /* extern "C" */
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 592a72927c..b36999e67a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -232,6 +232,8 @@ brw_codegen_wm_prog(struct brw_context *brw,
 		    &prog_data, sizeof(prog_data),
 		    &brw->wm.base.prog_offset, &brw->wm.prog_data);
 
+   brw->wm.prog_data->program_size = program_size;
+
    ralloc_free(mem_ctx);
 
    return true;
@@ -452,8 +454,9 @@ brw_wm_state_dirty (struct brw_context *brw)
                           BRW_NEW_VUE_MAP_GEOM_OUT);
 }
 
-static void brw_wm_populate_key( struct brw_context *brw,
-				 struct brw_wm_prog_key *key )
+void
+brw_wm_populate_key( struct brw_context *brw,
+                     struct brw_wm_prog_key *key )
 {
    struct gl_context *ctx = &brw->ctx;
    /* BRW_NEW_FRAGMENT_PROGRAM */
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 0a8a97b2f5..51f1ae8e8d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -90,6 +90,10 @@ bool brw_wm_prog_data_compare(const void *a, const void *b);
 void
 brw_upload_wm_prog(struct brw_context *brw);
 
+void
+brw_wm_populate_key(struct brw_context *brw,
+                    struct brw_wm_prog_key *key);
+
 #ifdef __cplusplus
 } // extern "C"
 #endif
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index faa1de739d..c10cefdc5e 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -86,6 +86,7 @@
 #include "blend.h"
 #include "buffers.h"
 #include "bufferobj.h"
+#include "cache.h"
 #include "context.h"
 #include "cpuinfo.h"
 #include "debug.h"
@@ -1204,6 +1205,8 @@ _mesa_initialize_context(struct gl_context *ctx,
    memset(&ctx->TextureFormatSupported, GL_TRUE,
 	  sizeof(ctx->TextureFormatSupported));
 
+   ctx->Cache = cache_create();
+
    switch (ctx->API) {
    case API_OPENGL_COMPAT:
       ctx->BeginEnd = create_beginend_table(ctx);
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 7b55677de3..470fc86abd 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2373,6 +2373,7 @@ struct gl_shader
    GLuint Name;  /**< AKA the handle */
    GLint RefCount;  /**< Reference count */
    GLchar *Label;   /**< GL_KHR_debug */
+   unsigned char sha1[20]; /**< SHA1 hash of pre-processed source */
    GLboolean DeletePending;
    GLboolean CompileStatus;
    bool IsES;              /**< True if this shader uses GLSL ES */
@@ -2619,6 +2620,7 @@ struct gl_shader_program
     * Is the application intending to glGetProgramBinary this program?
     */
    GLboolean BinaryRetreivableHint;
+   unsigned char sha1[20]; /**< SHA1 hash of linked program */
 
    /**
     * Indicates whether program can be bound for individual pipeline stages
@@ -2719,7 +2721,8 @@ struct gl_shader_program
    unsigned NumUniformStorage;
    unsigned NumHiddenUniforms;
    struct gl_uniform_storage *UniformStorage;
-
+   unsigned NumUniformDataSlots;
+   union gl_constant_value *UniformDataSlots;
    /**
     * Mapping from GL uniform locations returned by \c glUniformLocation to
     * UniformStorage entries. Arrays will have multiple contiguous slots
@@ -4408,6 +4411,8 @@ struct gl_context
     * Once this field becomes true, it is never reset to false.
     */
    GLboolean ShareGroupReset;
+
+   struct program_cache *Cache;
 };
 
 
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 0b2eb12236..8f6ca15089 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -48,6 +48,9 @@
 #include "main/uniforms.h"
 
 #include "program/hash_table.h"
+#include "shader_cache.h"
+
+#include "program/hash_table.h"
 #include "program/prog_instruction.h"
 #include "program/prog_optimize.h"
 #include "program/prog_print.h"
@@ -2972,6 +2975,13 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       link_shaders(ctx, prog);
    }
 
+   /* FIXME: We look at prog->Version to determine whether we actually linked
+    * the program or just loaded the uniform meta data from cache.  We
+    * probably want to turn prog->LinkStatus into an enum that captures the
+    * different states.*/
+   if (prog->LinkStatus && prog->Version == 0)
+      return;
+
    if (prog->LinkStatus) {
       if (!ctx->Driver.LinkShader(ctx, prog)) {
 	 prog->LinkStatus = GL_FALSE;
@@ -2990,6 +3000,8 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 	 fprintf(stderr, "%s\n", prog->InfoLog);
       }
    }
+
+   shader_cache_write_program_metadata(ctx, prog);
 }
 
 } /* extern "C" */
author	Carl Worth <cworth@cworth.org>	2015-03-16 11:18:53 -0700
committer	Carl Worth <cworth@cworth.org>	2015-07-10 10:25:16 -0700
commit	44fb1d29d096aeff2b3e1a106976dc77d244c898 (patch)
tree	5b774b6a81a3c05e67d5086d798b73b8e80c34d6
parent	0b1599f054b8866df03317d1970921d096332d68 (diff)