summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2010-11-02 09:11:17 -0700
committerEric Anholt <eric@anholt.net>2011-05-18 13:57:17 -0700
commitb126a0c0cb30b1e2f2df1953fe14d8596d1cf4f7 (patch)
treef0b8d9ffebd25ab6775a5de8e8280eca82766b6b
parent7592f005608e6c03d53c18d27d9af84bde802014 (diff)
i965: Add support for correct GL_CLAMP behavior by clamping coordinates.
This removes the stupid strict-conformance fallback code I broke when adding ARB_sampler_objects. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36572 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c36
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp45
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c38
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sampler_state.c12
9 files changed, 90 insertions, 69 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 44ede608b7..bcfd678a92 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -325,42 +325,6 @@ static GLboolean check_fallbacks( struct brw_context *brw,
return GL_TRUE;
}
- /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
- * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
- * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
- * we want strict conformance, force the fallback.
- * Right now, we only do this for 2D textures.
- */
- {
- int u;
- for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
-
- if (texUnit->Enabled) {
- struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, u);
-
- if (texUnit->Enabled & TEXTURE_1D_BIT) {
- if (sampler->WrapS == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_2D_BIT) {
- if (sampler->WrapS == GL_CLAMP ||
- sampler->WrapT == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_3D_BIT) {
- if (sampler->WrapS == GL_CLAMP ||
- sampler->WrapT == GL_CLAMP ||
- sampler->WrapR == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- }
- }
- }
-
/* Nothing stopping us from the fast path now */
return GL_FALSE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 2157c93571..1943ab6021 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1172,7 +1172,8 @@ fs_visitor::visit(ir_assignment *ir)
}
fs_inst *
-fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ int sampler)
{
int mlen;
int base_mrf = 1;
@@ -1184,7 +1185,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
if (ir->shadow_comparitor) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+ fs_inst *inst = emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, base_mrf + mlen + i), coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
+
coordinate.reg_offset++;
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@@ -1212,7 +1217,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
mlen++;
} else if (ir->op == ir_tex) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+ fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
+ coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
coordinate.reg_offset++;
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@@ -1226,7 +1234,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
assert(ir->op == ir_txb || ir->op == ir_txl);
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), coordinate);
+ fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF,
+ base_mrf + mlen + i * 2),
+ coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
coordinate.reg_offset++;
}
@@ -1298,15 +1310,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
* surprising in the disassembly.
*/
fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ int sampler)
{
int mlen = 1; /* g0 header always present. */
int base_mrf = 1;
int reg_width = c->dispatch_width / 8;
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width),
- coordinate);
+ fs_inst *inst = emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, base_mrf + mlen + i * reg_width),
+ coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
coordinate.reg_offset++;
}
mlen += ir->coordinate->type->vector_elements * reg_width;
@@ -1357,7 +1373,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
}
fs_inst *
-fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ int sampler)
{
int mlen = 1; /* g0 header always present. */
int base_mrf = 1;
@@ -1391,8 +1408,10 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate)
/* Set up the coordinate */
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
- coordinate);
+ fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+ coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
coordinate.reg_offset++;
mlen += reg_width;
}
@@ -1517,11 +1536,11 @@ fs_visitor::visit(ir_texture *ir)
fs_reg dst = fs_reg(this, glsl_type::vec4_type);
if (intel->gen >= 7) {
- inst = emit_texture_gen7(ir, dst, coordinate);
+ inst = emit_texture_gen7(ir, dst, coordinate, sampler);
} else if (intel->gen >= 5) {
- inst = emit_texture_gen5(ir, dst, coordinate);
+ inst = emit_texture_gen5(ir, dst, coordinate, sampler);
} else {
- inst = emit_texture_gen4(ir, dst, coordinate);
+ inst = emit_texture_gen4(ir, dst, coordinate, sampler);
}
/* If there's an offset, we already set up m1. To avoid the implied move,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 1b37ef5d08..4b355c979e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -512,9 +512,12 @@ public:
fs_reg *emit_general_interpolation(ir_variable *ir);
void emit_interpolation_setup_gen4();
void emit_interpolation_setup_gen6();
- fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
- fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
- fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate);
+ fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ int sampler);
+ fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ int sampler);
+ fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ int sampler);
fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
bool try_emit_saturate(ir_expression *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 11a8732054..3aaa7c6d79 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -192,7 +192,7 @@ GLuint translate_tex_format(gl_format mesa_format,
GLenum srgb_decode);
/* brw_wm_sampler_state.c */
-GLuint translate_wrap_mode(GLenum wrap);
+uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest);
void upload_default_color(struct brw_context *brw,
struct gl_sampler_object *sampler,
int unit);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 40589b0d2e..907976295a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -410,6 +410,16 @@ static void brw_wm_populate_key( struct brw_context *brw,
swizzles[GET_SWZ(t->_Swizzle, 1)],
swizzles[GET_SWZ(t->_Swizzle, 2)],
swizzles[GET_SWZ(t->_Swizzle, 3)]);
+
+ if (sampler->MinFilter != GL_NEAREST &&
+ sampler->MagFilter != GL_NEAREST) {
+ if (sampler->WrapS == GL_CLAMP)
+ key->gl_clamp_mask[0] |= 1 << i;
+ if (sampler->WrapT == GL_CLAMP)
+ key->gl_clamp_mask[1] |= 1 << i;
+ if (sampler->WrapR == GL_CLAMP)
+ key->gl_clamp_mask[2] |= 1 << i;
+ }
}
else {
key->tex_swizzles[i] = SWIZZLE_NOOP;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index a5f99a0a65..8ab531bdf8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -71,9 +71,9 @@ struct brw_wm_prog_key {
GLuint shadowtex_mask:16;
GLuint yuvtex_mask:16;
GLuint yuvtex_swap_mask:16; /* UV swaped */
+ uint16_t gl_clamp_mask[3];
GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
-
GLushort drawable_height;
GLbitfield64 vp_outputs_written;
GLuint iz_lookup;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index fd4cd892f4..f61757a8ca 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1100,11 +1100,16 @@ void emit_tex(struct brw_wm_compile *c,
/* Emit the texcoords. */
for (i = 0; i < nr_texcoords; i++) {
+ if (c->key.gl_clamp_mask[i] & (1 << sampler))
+ brw_set_saturate(p, true);
+
if (emit & (1<<i))
brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
else
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
cur_mrf += mrf_per_channel;
+
+ brw_set_saturate(p, false);
}
/* Fill in the shadow comparison reference value. */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 918c1d6243..5de39aa457 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -44,20 +44,28 @@
-/* The brw (and related graphics cores) do not support GL_CLAMP. The
- * Intel drivers for "other operating systems" implement GL_CLAMP as
- * GL_CLAMP_TO_EDGE, so the same is done here.
- */
-GLuint
-translate_wrap_mode(GLenum wrap)
+uint32_t
+translate_wrap_mode(GLenum wrap, bool using_nearest)
{
switch( wrap ) {
case GL_REPEAT:
return BRW_TEXCOORDMODE_WRAP;
- case GL_CLAMP:
- return BRW_TEXCOORDMODE_CLAMP;
+ case GL_CLAMP:
+ /* GL_CLAMP is the weird mode where coordinates are clamped to
+ * [0.0, 1.0], so linear filtering of coordinates outside of
+ * [0.0, 1.0] give you half edge texel value and half border
+ * color. The fragment shader will clamp the coordinates, and
+ * we set clamp_border here, which gets the result desired. We
+ * just use clamp(_to_edge) for nearest, because for nearest
+ * clamping to 1.0 gives border color instead of the desired
+ * edge texels.
+ */
+ if (using_nearest)
+ return BRW_TEXCOORDMODE_CLAMP;
+ else
+ return BRW_TEXCOORDMODE_CLAMP_BORDER;
case GL_CLAMP_TO_EDGE:
- return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+ return BRW_TEXCOORDMODE_CLAMP;
case GL_CLAMP_TO_BORDER:
return BRW_TEXCOORDMODE_CLAMP_BORDER;
case GL_MIRRORED_REPEAT:
@@ -155,11 +163,13 @@ static void brw_update_sampler_state(struct brw_context *brw,
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
struct gl_texture_object *texObj = texUnit->_Current;
struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+ bool using_nearest = false;
switch (gl_sampler->MinFilter) {
case GL_NEAREST:
sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+ using_nearest = true;
break;
case GL_LINEAR:
sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@@ -200,6 +210,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
switch (gl_sampler->MagFilter) {
case GL_NEAREST:
sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+ using_nearest = true;
break;
case GL_LINEAR:
sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@@ -209,9 +220,12 @@ static void brw_update_sampler_state(struct brw_context *brw,
}
}
- sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
- sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
- sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+ using_nearest);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+ using_nearest);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+ using_nearest);
if (intel->gen >= 6 &&
sampler->ss0.min_filter != sampler->ss0.mag_filter)
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index 8487a8fa4b..95f6fbf741 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -41,11 +41,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
struct gl_texture_object *texObj = texUnit->_Current;
struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+ bool using_nearest = false;
switch (gl_sampler->MinFilter) {
case GL_NEAREST:
sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+ using_nearest = true;
break;
case GL_LINEAR:
sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@@ -85,6 +87,7 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
switch (gl_sampler->MagFilter) {
case GL_NEAREST:
sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+ using_nearest = true;
break;
case GL_LINEAR:
sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@@ -94,9 +97,12 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
}
}
- sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
- sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
- sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
+ sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+ using_nearest);
+ sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+ using_nearest);
+ sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+ using_nearest);
/* Cube-maps on 965 and later must use the same wrap mode for all 3
* coordinate dimensions. Futher, only CUBE and CLAMP are valid.