i965/vec4: split VEC4_OPCODE_FROM_DOUBLE into one opcode per destination's type

This way we can set the destination type as double to all these new opcodes, avoiding any optimizer's confusion that was happening before. Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> [ Francisco Jerez: Drop no_spill workaround originally needed due to the bogus destination type of VEC4_OPCODE_FROM_DOUBLE. ] Reviewed-by: Francisco Jerez <currojerez@riseup.net>
author: Samuel Iglesias Gonsálvez <siglesias@igalia.com> 2017-03-24 08:46:13 +0100
committer: Francisco Jerez <currojerez@riseup.net> 2017-04-14 14:56:08 -0700
commit: 6e3265eae533a1bff4f23a4508c5d8e9ab23164d (patch)
tree: f6ce30b033ad42e549eac7acd636de9b14dc0aaa /src
parent: 50a5217637636f066feabefd7fe46d0ff7778a64 (diff)
7 files changed, 60 insertions, 27 deletions
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index f0b0d5c2a0..13a70f6f6a 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -447,7 +447,9 @@ enum opcode {
    VEC4_OPCODE_MOV_BYTES,
    VEC4_OPCODE_PACK_BYTES,
    VEC4_OPCODE_UNPACK_UNIFORM,
-   VEC4_OPCODE_FROM_DOUBLE,
+   VEC4_OPCODE_DOUBLE_TO_F32,
+   VEC4_OPCODE_DOUBLE_TO_D32,
+   VEC4_OPCODE_DOUBLE_TO_U32,
    VEC4_OPCODE_TO_DOUBLE,
    VEC4_OPCODE_PICK_LOW_32BIT,
    VEC4_OPCODE_PICK_HIGH_32BIT,
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
index 73bbc93135..304b4ecf4f 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -326,8 +326,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
       return "pack_bytes";
    case VEC4_OPCODE_UNPACK_UNIFORM:
       return "unpack_uniform";
-   case VEC4_OPCODE_FROM_DOUBLE:
-      return "double_to_single";
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+      return "double_to_f32";
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+      return "double_to_d32";
+   case VEC4_OPCODE_DOUBLE_TO_U32:
+      return "double_to_u32";
    case VEC4_OPCODE_TO_DOUBLE:
       return "single_to_double";
    case VEC4_OPCODE_PICK_LOW_32BIT:
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 386057e3e3..0b92ba704e 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -260,7 +260,9 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
 {
    switch (opcode) {
    case SHADER_OPCODE_GEN4_SCRATCH_READ:
-   case VEC4_OPCODE_FROM_DOUBLE:
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
    case VEC4_OPCODE_TO_DOUBLE:
    case VEC4_OPCODE_PICK_LOW_32BIT:
    case VEC4_OPCODE_PICK_HIGH_32BIT:
@@ -521,7 +523,9 @@ vec4_visitor::opt_reduce_swizzle()
          break;
 
       case VEC4_OPCODE_TO_DOUBLE:
-      case VEC4_OPCODE_FROM_DOUBLE:
+      case VEC4_OPCODE_DOUBLE_TO_F32:
+      case VEC4_OPCODE_DOUBLE_TO_D32:
+      case VEC4_OPCODE_DOUBLE_TO_U32:
       case VEC4_OPCODE_PICK_LOW_32BIT:
       case VEC4_OPCODE_PICK_HIGH_32BIT:
       case VEC4_OPCODE_SET_LOW_32BIT:
@@ -2255,7 +2259,9 @@ static bool
 is_align1_df(vec4_instruction *inst)
 {
    switch (inst->opcode) {
-   case VEC4_OPCODE_FROM_DOUBLE:
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
    case VEC4_OPCODE_TO_DOUBLE:
    case VEC4_OPCODE_PICK_LOW_32BIT:
    case VEC4_OPCODE_PICK_HIGH_32BIT:
diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp
index e7f6f93f8b..c1ae32a293 100644
--- a/src/intel/compiler/brw_vec4_copy_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp
@@ -293,7 +293,9 @@ static bool
 is_align1_opcode(unsigned opcode)
 {
    switch (opcode) {
-   case VEC4_OPCODE_FROM_DOUBLE:
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
    case VEC4_OPCODE_TO_DOUBLE:
    case VEC4_OPCODE_PICK_LOW_32BIT:
    case VEC4_OPCODE_PICK_HIGH_32BIT:
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index 65f3a9a9f0..5be4ef7fd4 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1940,9 +1940,28 @@ generate_code(struct brw_codegen *p,
          break;
       }
 
-      case VEC4_OPCODE_FROM_DOUBLE: {
+      case VEC4_OPCODE_DOUBLE_TO_F32:
+      case VEC4_OPCODE_DOUBLE_TO_D32:
+      case VEC4_OPCODE_DOUBLE_TO_U32: {
          assert(type_sz(src[0].type) == 8);
-         assert(type_sz(dst.type) == 4);
+         assert(type_sz(dst.type) == 8);
+
+         brw_reg_type dst_type;
+
+         switch (inst->opcode) {
+         case VEC4_OPCODE_DOUBLE_TO_F32:
+            dst_type = BRW_REGISTER_TYPE_F;
+            break;
+         case VEC4_OPCODE_DOUBLE_TO_D32:
+            dst_type = BRW_REGISTER_TYPE_D;
+            break;
+         case VEC4_OPCODE_DOUBLE_TO_U32:
+            dst_type = BRW_REGISTER_TYPE_UD;
+            break;
+         default:
+            unreachable("Not supported conversion");
+         }
+         dst = retype(dst, dst_type);
 
          brw_set_default_access_mode(p, BRW_ALIGN_1);
 
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index 64371a16de..9d9ded2b96 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -1183,16 +1183,28 @@ vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src,
       return;
    }
 
+   enum opcode op;
+   switch (dst.type) {
+   case BRW_REGISTER_TYPE_D:
+      op = VEC4_OPCODE_DOUBLE_TO_D32;
+      break;
+   case BRW_REGISTER_TYPE_UD:
+      op = VEC4_OPCODE_DOUBLE_TO_U32;
+      break;
+   case BRW_REGISTER_TYPE_F:
+      op = VEC4_OPCODE_DOUBLE_TO_F32;
+      break;
+   default:
+      unreachable("Unknown conversion");
+   }
+
    dst_reg temp = dst_reg(this, glsl_type::dvec4_type);
    emit(MOV(temp, src));
-
    dst_reg temp2 = dst_reg(this, glsl_type::dvec4_type);
-   temp2 = retype(temp2, dst.type);
-   emit(VEC4_OPCODE_FROM_DOUBLE, temp2, src_reg(temp))
-      ->size_written = 2 * REG_SIZE;
+   emit(op, temp2, src_reg(temp));
 
-   emit(VEC4_OPCODE_PICK_LOW_32BIT, temp2, src_reg(retype(temp2, BRW_REGISTER_TYPE_DF)));
-   vec4_instruction *inst = emit(MOV(dst, src_reg(temp2)));
+   emit(VEC4_OPCODE_PICK_LOW_32BIT, retype(temp2, dst.type), src_reg(temp2));
+   vec4_instruction *inst = emit(MOV(dst, src_reg(retype(temp2, dst.type))));
    inst->saturate = saturate;
 }
 
diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp
index e3b46cc2f7..a0ba77b867 100644
--- a/src/intel/compiler/brw_vec4_reg_allocate.cpp
+++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp
@@ -447,18 +447,6 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
          if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8)
             no_spill[inst->dst.nr] = true;
 
-         /* FROM_DOUBLE opcodes are setup so that they use a dst register
-          * with a size of 2 even if they only produce a single-precison
-          * result (this is so that the opcode can use the larger register to
-          * produce a 64-bit aligned intermediary result as required by the
-          * hardware during the conversion process). This creates a problem for
-          * spilling though, because when we attempt to emit a spill for the
-          * dst we see a 32-bit destination and emit a scratch write that
-          * allocates a single spill register.
-          */
-         if (inst->opcode == VEC4_OPCODE_FROM_DOUBLE)
-            no_spill[inst->dst.nr] = true;
-
          /* We can't spill registers that mix 32-bit and 64-bit access (that
           * contain 64-bit data that is operated on via 32-bit instructions)
           */
author	Samuel Iglesias Gonsálvez <siglesias@igalia.com>	2017-03-24 08:46:13 +0100
committer	Francisco Jerez <currojerez@riseup.net>	2017-04-14 14:56:08 -0700
commit	6e3265eae533a1bff4f23a4508c5d8e9ab23164d (patch)
tree	f6ce30b033ad42e549eac7acd636de9b14dc0aaa /src
parent	50a5217637636f066feabefd7fe46d0ff7778a64 (diff)