gallivm: fix alignment issue for vertex data fetch

We cannot guarantee that vertex buffers have the necessary alignment for fetching all AoS members at once (for instance 4x32bit XYZW data). We can however guarantee that for textures. This did not cause errors for older llvm versions but it now matters and will cause segfaults if the data happens to not be aligned. Thus we need to set alignment manually. (Note that we can't actually really guarantee data to be even element aligned due to offsets in vertex buffers being bytes and OpenGL allowing this, but it does not matter for x86 as alignment is only required for sse vectors - not sure what happens on other archs, however.) This fixes https://bugs.freedesktop.org/show_bug.cgi?id=85467.
author: Roland Scheidegger <sroland@vmware.com> 2014-11-18 15:22:29 +0100
committer: Roland Scheidegger <sroland@vmware.com> 2014-11-18 15:26:59 +0100
commit: 74f505fa73eda0c9b5b1984bebb44cedac8e8794 (patch)
tree: f85c9c2403c74ae006e0744e65145ae1e6ff081f
parent: 3958378abb31164443f888f09f644100d5c34dae (diff)
9 files changed, 50 insertions, 14 deletions
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 3a1b057964..1c26560366 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -742,6 +742,7 @@ generate_fetch(struct gallivm_state *gallivm,
       val = lp_build_fetch_rgba_aos(gallivm,
                                     format_desc,
                                     lp_float32_vec4_type(),
+                                    FALSE,
                                     map_ptr,
                                     zero, zero, zero);
       LLVMBuildStore(builder, val, temp_ptr);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 1177fb224d..969f1f6cc9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -62,6 +62,7 @@ LLVMValueRef
 lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                         const struct util_format_description *format_desc,
                         struct lp_type type,
+                        boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offset,
                         LLVMValueRef i,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index af755d460c..3c25c329ed 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -356,6 +356,7 @@ lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
  * Fetch a pixel into a 4 float AoS.
  *
  * \param format_desc  describes format of the image we're fetching from
+ * \param aligned  whether the data is guaranteed to be aligned
  * \param ptr  address of the pixel block (or the texel if uncompressed)
  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
  *              these will always be (0, 0).
@@ -365,6 +366,7 @@ LLVMValueRef
 lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                         const struct util_format_description *format_desc,
                         struct lp_type type,
+                        boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offset,
                         LLVMValueRef i,
@@ -400,7 +402,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
 
       packed = lp_build_gather(gallivm, type.length/4,
                                format_desc->block.bits, type.width*4,
-                               base_ptr, offset, TRUE);
+                               aligned, base_ptr, offset, TRUE);
 
       assert(format_desc->block.bits <= vec_len);
 
@@ -437,7 +439,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
          LLVMValueRef packed;
 
          packed = lp_build_gather_elem(gallivm, num_pixels,
-                                       format_desc->block.bits, 32,
+                                       format_desc->block.bits, 32, aligned,
                                        base_ptr, offset, k, FALSE);
 
          tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index ff2887ee8f..afaabc0879 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -386,6 +386,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                                type.length,
                                format_desc->block.bits,
                                type.width,
+                               TRUE,
                                base_ptr, offset, FALSE);
 
       /*
@@ -411,8 +412,8 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 
       packed = lp_build_gather(gallivm, type.length,
                                format_desc->block.bits,
-                               type.width, base_ptr, offset,
-                               FALSE);
+                               type.width, TRUE,
+                               base_ptr, offset, FALSE);
       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
       }
@@ -438,15 +439,15 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
          unsigned mask = (1 << 8) - 1;
          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
          offset = LLVMBuildAdd(builder, offset, s_offset, "");
-         packed = lp_build_gather(gallivm, type.length,
-                                  32, type.width, base_ptr, offset, FALSE);
+         packed = lp_build_gather(gallivm, type.length, 32, type.width,
+                                  TRUE, base_ptr, offset, FALSE);
          packed = LLVMBuildAnd(builder, packed,
                                lp_build_const_int_vec(gallivm, type, mask), "");
       }
       else {
          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
-         packed = lp_build_gather(gallivm, type.length,
-                                  32, type.width, base_ptr, offset, TRUE);
+         packed = lp_build_gather(gallivm, type.length, 32, type.width,
+                                  TRUE, base_ptr, offset, TRUE);
          packed = LLVMBuildBitCast(builder, packed,
                                    lp_build_vec_type(gallivm, type), "");
       }
@@ -472,7 +473,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
       tmp_type.norm = TRUE;
 
       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
-                                    base_ptr, offset, i, j);
+                                    TRUE, base_ptr, offset, i, j);
 
       lp_build_rgba8_to_fi32_soa(gallivm,
                                 type,
@@ -522,7 +523,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 
          /* Get a single float[4]={R,G,B,A} pixel */
          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
-                                       base_ptr, offset_elem,
+                                       TRUE, base_ptr, offset_elem,
                                        i_elem, j_elem);
 
          /*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
index 873f354c04..4f5a45c6a3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -497,7 +497,7 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
    assert(format_desc->block.width == 2);
    assert(format_desc->block.height == 1);
 
-   packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset, FALSE);
+   packed = lp_build_gather(gallivm, n, 32, 32, TRUE, base_ptr, offset, FALSE);
 
    (void)j;
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
index 9155d811c0..d02602041c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -76,6 +76,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
                      unsigned length,
                      unsigned src_width,
                      unsigned dst_width,
+                     boolean aligned,
                      LLVMValueRef base_ptr,
                      LLVMValueRef offsets,
                      unsigned i,
@@ -93,6 +94,27 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
    ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
    res = LLVMBuildLoad(gallivm->builder, ptr, "");
 
+   /* XXX
+    * On some archs we probably really want to avoid having to deal
+    * with alignments lower than 4 bytes (if fetch size is a power of
+    * two >= 32). On x86 it doesn't matter, however.
+    * We should be able to guarantee full alignment for any kind of texture
+    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
+    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
+    * but I don't think that's quite what we wanted).
+    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
+    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
+    * enforcing what we want (which is what d3d10 does, the offset needs to
+    * be aligned to element size, but GL has bytes regardless of element
+    * size which would only leave us with minimum alignment restriction of 16
+    * which doesn't make much sense if the type isn't 4x32bit). Due to
+    * translation of offsets to first_elem in sampler_views it actually seems
+    * gallium could not do anything else except 16 no matter what...
+    */
+  if (!aligned) {
+      lp_set_load_alignment(res, 1);
+   }
+
    assert(src_width <= dst_width);
    if (src_width > dst_width) {
       res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
@@ -126,6 +148,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
  * @param length length of the offsets
  * @param src_width src element width in bits
  * @param dst_width result element width in bits (src will be expanded to fit)
+ * @param aligned whether the data is guaranteed to be aligned (to src_width)
  * @param base_ptr base pointer, should be a i8 pointer type.
  * @param offsets vector with offsets
  * @param vector_justify select vector rather than integer justification
@@ -135,6 +158,7 @@ lp_build_gather(struct gallivm_state *gallivm,
                 unsigned length,
                 unsigned src_width,
                 unsigned dst_width,
+                boolean aligned,
                 LLVMValueRef base_ptr,
                 LLVMValueRef offsets,
                 boolean vector_justify)
@@ -144,7 +168,7 @@ lp_build_gather(struct gallivm_state *gallivm,
    if (length == 1) {
       /* Scalar */
       return lp_build_gather_elem(gallivm, length,
-                                  src_width, dst_width,
+                                  src_width, dst_width, aligned,
                                   base_ptr, offsets, 0, vector_justify);
    } else {
       /* Vector */
@@ -158,7 +182,7 @@ lp_build_gather(struct gallivm_state *gallivm,
          LLVMValueRef index = lp_build_const_int32(gallivm, i);
          LLVMValueRef elem;
          elem = lp_build_gather_elem(gallivm, length,
-                                     src_width, dst_width,
+                                     src_width, dst_width, aligned,
                                      base_ptr, offsets, i, vector_justify);
          res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
       }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
index ee694732d3..3ede4763a7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
@@ -45,6 +45,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
                      unsigned length,
                      unsigned src_width,
                      unsigned dst_width,
+                     boolean aligned,
                      LLVMValueRef base_ptr,
                      LLVMValueRef offsets,
                      unsigned i,
@@ -55,6 +56,7 @@ lp_build_gather(struct gallivm_state *gallivm,
                 unsigned length,
                 unsigned src_width,
                 unsigned dst_width,
+                boolean aligned,
                 LLVMValueRef base_ptr,
                 LLVMValueRef offsets,
                 boolean vector_justify);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 394521d382..d7fde810a7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -581,6 +581,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
                               bld->texel_type.length,
                               bld->format_desc->block.bits,
                               bld->texel_type.width,
+                              TRUE,
                               data_ptr, offset, TRUE);
 
       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
@@ -589,6 +590,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
                                       bld->format_desc,
                                       u8n.type,
+                                      TRUE,
                                       data_ptr, offset,
                                       x_subcoord,
                                       y_subcoord);
@@ -919,6 +921,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
                                        bld->texel_type.length,
                                        bld->format_desc->block.bits,
                                        bld->texel_type.width,
+                                       TRUE,
                                        data_ptr, offset[k][j][i], TRUE);
 
                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
@@ -927,6 +930,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
                                                bld->format_desc,
                                                u8n.type,
+                                               TRUE,
                                                data_ptr, offset[k][j][i],
                                                x_subcoord[i],
                                                y_subcoord[j]);
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 48bf06e3c4..d9abd1ae37 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -112,7 +112,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
    block = LLVMAppendBasicBlockInContext(context, func, "entry");
    LLVMPositionBuilderAtEnd(builder, block);
 
-   rgba = lp_build_fetch_rgba_aos(gallivm, desc, type,
+   rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE,
                                   packed_ptr, offset, i, j);
 
    LLVMBuildStore(builder, rgba, rgba_ptr);
@@ -252,6 +252,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
          }
 
          /* To ensure it's 16-byte aligned */
+         /* Could skip this and use unaligned lp_build_fetch_rgba_aos */
          memcpy(packed, test->packed, sizeof packed);
 
          for (i = 0; i < desc->block.height; ++i) {
author	Roland Scheidegger <sroland@vmware.com>	2014-11-18 15:22:29 +0100
committer	Roland Scheidegger <sroland@vmware.com>	2014-11-18 15:26:59 +0100
commit	74f505fa73eda0c9b5b1984bebb44cedac8e8794 (patch)
tree	f85c9c2403c74ae006e0744e65145ae1e6ff081f
parent	3958378abb31164443f888f09f644100d5c34dae (diff)