diff options
author | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2017-01-16 16:39:06 +0100 |
---|---|---|
committer | Nicolai Hähnle <nicolai.haehnle@amd.com> | 2017-01-19 14:51:06 +0100 |
commit | 6895bb0dac1d3b3200b0c89e405e68e122a0ba66 (patch) | |
tree | 72b49a032aac43623b6dae12f5507191006919d7 | |
parent | f3dc5bddb02df2eb1d5ea4293655efd5ecc55ff8 (diff) |
glsl: split DIV_TO_MUL_RCP into single- and double-precision flags
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-rw-r--r-- | src/compiler/glsl/ir_optimization.h | 4 | ||||
-rw-r--r-- | src/compiler/glsl/lower_instructions.cpp | 19 |
2 files changed, 14 insertions, 9 deletions
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 0d6c4e6a66..01e5270211 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -30,7 +30,7 @@ /* Operations for lower_instructions() */ #define SUB_TO_ADD_NEG 0x01 -#define DIV_TO_MUL_RCP 0x02 +#define FDIV_TO_MUL_RCP 0x02 #define EXP_TO_EXP2 0x04 #define POW_TO_EXP2 0x08 #define LOG_TO_LOG2 0x10 @@ -49,6 +49,8 @@ #define FIND_LSB_TO_FLOAT_CAST 0x20000 #define FIND_MSB_TO_FLOAT_CAST 0x40000 #define IMUL_HIGH_TO_MUL 0x80000 +#define DDIV_TO_MUL_RCP 0x100000 +#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP) /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp index 9fc83d1583..729cb13f84 100644 --- a/src/compiler/glsl/lower_instructions.cpp +++ b/src/compiler/glsl/lower_instructions.cpp @@ -54,8 +54,8 @@ * want to recognize add(op0, neg(op1)) or the other way around to * produce a subtract anyway. * - * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP: - * -------------------------------------- + * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP: + * --------------------------------------------------------- * Breaks an ir_binop_div expression down to op0 * (rcp(op1)). * * Many GPUs don't have a divide instruction (945 and 965 included), @@ -63,9 +63,11 @@ * reciprocal. By breaking the operation down, constant reciprocals * can get constant folded. * - * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP - * handles the integer case, converting to and from floating point so that - * RCP is possible. + * FDIV_TO_MUL_RCP only lowers single-precision floating point division; + * DDIV_TO_MUL_RCP only lowers double-precision floating point division. + * DIV_TO_MUL_RCP is a convenience macro that sets both flags. + * INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating + * point so that RCP is possible. * * EXP_TO_EXP2 and LOG_TO_LOG2: * ---------------------------- @@ -326,7 +328,8 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir) /* Don't generate new IR that would need to be lowered in an additional * pass. */ - if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double())) + if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) || + (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double())) div_to_mul_rcp(div_expr); ir_expression *const floor_expr = @@ -1599,8 +1602,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) case ir_binop_div: if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP)) int_div_to_mul_rcp(ir); - else if ((ir->operands[1]->type->is_float() || - ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP)) + else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) || + (ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP))) div_to_mul_rcp(ir); break; |