diff options
author | David Li <davidxli@google.com> | 2011-01-27 10:43:09 -0800 |
---|---|---|
committer | David Li <davidxli@google.com> | 2011-01-27 10:43:09 -0800 |
commit | 13fea0fc797fa0d4c236db5aa2e6a23fc0e450db (patch) | |
tree | 721e1c37cb9ebc97acc26214557241c204a0f563 | |
parent | 25e19a4ebb63113cb70681e0e946740344690821 (diff) |
Start merging pixelflinger2
Checkpoint on merging pixelflinger2 into mesa.
Partially implemented LLVM texture sampling.
Signed-off-by: David Li <davidxli@google.com>
-rw-r--r-- | Android.mk | 9 | ||||
-rw-r--r-- | include/pixelflinger2/pixelflinger2_constants.h | 42 | ||||
-rw-r--r-- | include/pixelflinger2/pixelflinger2_format.h | 66 | ||||
-rw-r--r-- | include/pixelflinger2/pixelflinger2_interface.h | 199 | ||||
-rw-r--r-- | include/pixelflinger2/pixelflinger2_vector4.h | 199 | ||||
-rw-r--r-- | src/glsl/glsl_types.h | 2 | ||||
-rw-r--r-- | src/glsl/ir_to_llvm.cpp | 25 | ||||
-rw-r--r-- | src/glsl/ir_to_llvm.h | 2 | ||||
-rw-r--r-- | src/glsl/ir_to_llvm_helper.cpp | 619 | ||||
-rw-r--r-- | src/glsl/linker.cpp | 5 | ||||
-rw-r--r-- | src/glsl/main.cpp | 209 | ||||
-rw-r--r-- | src/mesa/main/glheader.h | 59 | ||||
-rw-r--r-- | src/pixelflinger2/buffer.cpp | 225 | ||||
-rw-r--r-- | src/pixelflinger2/pixelflinger2.cpp | 238 | ||||
-rw-r--r-- | src/pixelflinger2/pixelflinger2.h | 160 | ||||
-rw-r--r-- | src/pixelflinger2/raster.cpp | 358 | ||||
-rw-r--r-- | src/pixelflinger2/scanline.cpp | 535 | ||||
-rw-r--r-- | src/pixelflinger2/shader.cpp | 436 | ||||
-rw-r--r-- | src/pixelflinger2/texture.cpp | 426 | ||||
-rw-r--r-- | src/pixelflinger2/texture.h | 43 | ||||
-rw-r--r-- | src/talloc/hieralloc.c | 36 | ||||
-rw-r--r-- | src/talloc/hieralloc.h | 2 |
22 files changed, 3807 insertions, 88 deletions
@@ -101,8 +101,15 @@ mesa_SRC_FILES := \ src/glsl/s_expression.cpp \ src/glsl/strtod.c \ src/glsl/ir_to_llvm.cpp \ + src/glsl/ir_to_llvm_helper.cpp \ src/mesa/program/hash_table.c \ src/mesa/program/symbol_table.c \ + src/pixelflinger2/buffer.cpp \ + src/pixelflinger2/pixelflinger2.cpp \ + src/pixelflinger2/raster.cpp \ + src/pixelflinger2/scanline.cpp \ + src/pixelflinger2/shader.cpp \ + src/pixelflinger2/texture.cpp \ src/talloc/hieralloc.c # Executable for host @@ -165,7 +172,7 @@ LOCAL_SRC_FILES += egl.cpp LOCAL_SHARED_LIBRARIES += libutils libhardware libsurfaceflinger_client libpixelflinger LOCAL_CPPFLAGS += -DDRAW_TO_SCREEN=1 LOCAL_CFLAGS += -fvisibility=hidden -LOCAL_CFLAGS += -fstrict-aliasing +LOCAL_CPPFLAGS += -fvisibility=hidden ifeq ($(USE_LLVM_EXECUTIONENGINE),true) LOCAL_CPPFLAGS += -DUSE_LLVM_EXECUTIONENGINE diff --git a/include/pixelflinger2/pixelflinger2_constants.h b/include/pixelflinger2/pixelflinger2_constants.h new file mode 100644 index 0000000..5a90e65 --- /dev/null +++ b/include/pixelflinger2/pixelflinger2_constants.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _PIXELFLINGER2_CONSTANTS_H_ +#define _PIXELFLINGER2_CONSTANTS_H_ + +#define GGL_MAXVERTEXATTRIBS 8 +#define GGL_MAXVERTEXUNIFORMVECTORS 128 +#define GGL_MAXVARYINGVECTORS 8 +#define GGL_MAXVERTEXTEXTUREIMAGEUNITS 8 +#define GGL_MAXCOMBINEDTEXTUREIMAGEUNITS 16 /* samplers used in vertex + fragment */ +#define GGL_MAXTEXTUREIMAGEUNITS 8 /* samplers used in fragment only */ +#define GGL_MAXFRAGMENTUNIFORMVECTORS 16 +#define GGL_MAXDRAWBUFFERS 2 + +// these describe the layout of VertexOut when fed to fs, +// it must NOT change and match VertexOut in pixelflinger_2.h +#define GGL_VS_OUTPUT_OFFSET 0 +#define GGL_VS_OUTPUT_POSITION_INDEX 1 + +#define GGL_FS_INPUT_OFFSET 1 // vector4 index of first fs input in VertexOut +#define GGL_FS_INPUT_FRAGCOORD_INDEX 0 +#define GGL_FS_INPUT_VARYINGS_INDEX (GGL_FS_INPUT_FRAGCOORD_INDEX + 1) +#define GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX (GGL_FS_INPUT_VARYINGS_INDEX + GGL_MAXVARYINGVECTORS) + +#define GGL_FS_OUTPUT_OFFSET (GGL_FS_INPUT_OFFSET + GGL_FS_INPUT_FRONTFACINGPOINTCOORD_INDEX + 1) +#define GGL_FS_OUTPUT_FRAGCOLOR_INDEX 0 + +#endif // _PIXELFLINGER2_CONSTANTS_H_ diff --git a/include/pixelflinger2/pixelflinger2_format.h b/include/pixelflinger2/pixelflinger2_format.h new file mode 100644 index 0000000..f582cb5 --- /dev/null +++ b/include/pixelflinger2/pixelflinger2_format.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _PIXELFLINGER2_FORMAT_H_ +#define _PIXELFLINGER2_FORMAT_H_ + +enum GGLPixelFormat { + // these constants need to match those + // in graphics/PixelFormat.java, ui/PixelFormat.h, BlitHardware.h + GGL_PIXEL_FORMAT_UNKNOWN = 0, + GGL_PIXEL_FORMAT_NONE = 0, + + GGL_PIXEL_FORMAT_RGBA_8888 = 1, // 4x8-bit ARGB + GGL_PIXEL_FORMAT_RGBX_8888 = 2, // 3x8-bit RGB stored in 32-bit chunks +// GGL_PIXEL_FORMAT_RGB_888 = 3, // 3x8-bit RGB + GGL_PIXEL_FORMAT_RGB_565 = 4, // 16-bit RGB +// GGL_PIXEL_FORMAT_BGRA_8888 = 5, // 4x8-bit BGRA +// GGL_PIXEL_FORMAT_RGBA_5551 = 6, // 16-bit RGBA +// GGL_PIXEL_FORMAT_RGBA_4444 = 7, // 16-bit RGBA + +// GGL_PIXEL_FORMAT_A_8 = 8, // 8-bit A +// GGL_PIXEL_FORMAT_L_8 = 9, // 8-bit L (R=G=B = L) +// GGL_PIXEL_FORMAT_LA_88 = 0xA, // 16-bit LA +// GGL_PIXEL_FORMAT_RGB_332 = 0xB, // 8-bit RGB (non paletted) + + // reserved range. don't use. +// GGL_PIXEL_FORMAT_RESERVED_10 = 0x10, +// GGL_PIXEL_FORMAT_RESERVED_11 = 0x11, +// GGL_PIXEL_FORMAT_RESERVED_12 = 0x12, +// GGL_PIXEL_FORMAT_RESERVED_13 = 0x13, +// GGL_PIXEL_FORMAT_RESERVED_14 = 0x14, +// GGL_PIXEL_FORMAT_RESERVED_15 = 0x15, +// GGL_PIXEL_FORMAT_RESERVED_16 = 0x16, +// GGL_PIXEL_FORMAT_RESERVED_17 = 0x17, + + // reserved/special formats +// GGL_PIXEL_FORMAT_Z_16 = 0x18, + GGL_PIXEL_FORMAT_S_8 = 0x19, +// GGL_PIXEL_FORMAT_SZ_24 = 0x1A, +// GGL_PIXEL_FORMAT_SZ_8 = 0x1B, + + GGL_PIXEL_FORMAT_Z_32 = 0x1C, + + // reserved range. don't use. +// GGL_PIXEL_FORMAT_RESERVED_20 = 0x20, +// GGL_PIXEL_FORMAT_RESERVED_21 = 0x21, + + + // must be last + GGL_PIXEL_FORMAT_COUNT = 0xFF +}; + +#endif // _PIXELFLINGER2_FORMAT_H_ diff --git a/include/pixelflinger2/pixelflinger2_interface.h b/include/pixelflinger2/pixelflinger2_interface.h new file mode 100644 index 0000000..ee4cdca --- /dev/null +++ b/include/pixelflinger2/pixelflinger2_interface.h @@ -0,0 +1,199 @@ +/** + ** + ** Copyright 2010, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#ifndef _PIXELFLINGER2_INTERFACE_H_ +#define _PIXELFLINGER2_INTERFACE_H_ + +#include "GLES2/gl2.h" +#include "pixelflinger2/pixelflinger2_format.h" +#include "pixelflinger2/pixelflinger2_constants.h" +#include "pixelflinger2/pixelflinger2_vector4.h" + +typedef struct gl_shader gl_shader_t; +typedef struct gl_shader_program gl_shader_program_t; + +typedef struct VertexInput { + Vector4 attributes[GGL_MAXVERTEXATTRIBS]; // vert input +} +#ifndef __arm__ +__attribute__ ((aligned (16))) // LLVM generates movaps on X86, needs 16 bytes align +#endif +VertexInput_t; + +// the layout must NOT change, and must match the #defines in constants.h +typedef struct VertexOutput { + Vector4 pointSize; // vert output + Vector4 position; // vert output and frag input gl_FragCoord + Vector4 varyings[GGL_MAXVARYINGVECTORS]; + Vector4 frontFacingPointCoord; // frag input, gl_FrontFacing gl_PointCoord yzw + Vector4 fragColor[GGL_MAXDRAWBUFFERS]; // frag output, gl_FragData +} +#ifndef __arm__ +__attribute__ ((aligned (16))) +#endif +VertexOutput_t ; + +typedef struct GGLSurface { + unsigned width, height; + enum GGLPixelFormat format; + void * data; + unsigned stride, version; +} GGLSurface_t; + +typedef struct GGLTexture { + unsigned type; // GL_TEXTURE_2D, or GL_TEXTURE_CUBE_MAP + + // currently only support RGBA_8888, RGBX_8888 and RGB_565 + // storage uses either int or short + enum GGLPixelFormat format; // affects vs/fs jit + + unsigned width, height; // base level dimension + unsigned levelCount; // mipmapped texture requires power-of-2 width and height + + // data layout is level 0 of first surface (cubemap +x), level 0 of second surface (for cube map, -x), + // level 0 of 3rd surface (cubemap +y), cubemap level 0 -y, cubemap level 0 +z, + // cubemap level 0 -z, level 1 of first surface, + // levels[1] is level 1 of 1st surface, level 1 of 2nd surface .... + // levels[n] is max level of first surface... + // levels[n] + 5 * width * height is last level of last cube map face + void ** levels; + + // the following affects vs/fs jit; must fit in byte; size used in GetShaderKey +unsigned wrapS : +2, wrapT : + 2; // GL_REPEAT = 0, GL_CLAMP_TO_EDGE = 1, GL_MIRRORED_REPEAT = 2 + + // GL_NEAREST = 0, GL_LINEAR, GL_NEAREST_MIPMAP_NEAREST = 2, + // GL_LINEAR_MIPMAP_NEAREST, GL_NEAREST_MIPMAP_LINEAR, GL_LINEAR_MIPMAP_LINEAR = 5 +unsigned minFilter : + 3; + +unsigned magFilter : + 1; // GL_NEAREST = 0, GL_LINEAR +} GGLTexture_t; + +// most functions are according to GL ES 2.0 spec and uses GLenum values +// there is some error checking for invalid GLenum +typedef struct GGLInterface GGLInterface_t; +struct GGLInterface { + // these 5 should be moved into libAgl2 + void (* CullFace)(GGLInterface_t * iface, GLenum mode); + void (* FrontFace)(GGLInterface_t * iface, GLenum mode); + void (* DepthRangef)(GGLInterface_t * iface, GLclampf zNear, GLclampf zFar); + void (* Viewport)(GGLInterface_t * iface, GLint x, GLint y, GLsizei width, GLsizei height); + void (* ViewportTransform)(const GGLInterface_t * iface, Vector4 * v); + + + void (* BlendColor)(GGLInterface_t * iface, GLclampf red, GLclampf green, + GLclampf blue, GLclampf alpha); + void (* BlendEquationSeparate)(GGLInterface_t * iface, GLenum modeRGB, GLenum modeAlpha); + void (* BlendFuncSeparate)(GGLInterface_t * iface, GLenum srcRGB, GLenum dstRGB, + GLenum srcAlpha, GLenum dstAlpha); + void (* EnableDisable)(GGLInterface_t * iface, GLenum cap, GLboolean enable); + + void (* DepthFunc)(GGLInterface_t * iface, GLenum func); + void (* StencilFuncSeparate)(GGLInterface_t * iface, GLenum face, GLenum func, + GLint ref, GLuint mask); + void (* StencilOpSeparate)(GGLInterface_t * iface, GLenum face, GLenum sfail, + GLenum dpfail, GLenum dppass); + // select GL_FRONT or GL_BACK stencil state before raster/scanline + void (* StencilSelect)(const GGLInterface_t * iface, GLenum face); + void (* ClearStencil)(GGLInterface_t * iface, GLint s); + void (* ClearColor)(GGLInterface_t * iface, GLclampf r, GLclampf g, GLclampf b, GLclampf a); + void (* ClearDepthf)(GGLInterface_t * iface, GLclampf d); + void (* Clear)(const GGLInterface_t * iface, GLbitfield buf); + + // shallow copy, surface data pointed to must be valid until texture is set to another texture + // libAgl2 needs to check ret of ShaderUniform to detect assigning to sampler unit + void (* SetSampler)(GGLInterface_t * iface, const unsigned sampler, GGLTexture_t * texture); + + // shallow copy, surface data must remain valid; use GL_COLOR_BUFFER_BIT, + // GL_DEPTH_BUFFER_BIT, GL_STENCIL_BUFFER_BIT; format must be RGBA_8888, Z_32 or S_8 + void (* SetBuffer)(GGLInterface_t * iface, const GLenum type, GGLSurface_t * surface); + + + // runs active vertex shader using currently set program; no error checking + void (* ProcessVertex)(const GGLInterface_t * iface, const VertexInput_t * input, + VertexOutput_t * output); + // draws a triangle given 3 unprocessed vertices; should be moved into libAgl2 + void (* DrawTriangle)(const GGLInterface_t * iface, const VertexInput_t * v0, + const VertexInput_t * v1, const VertexInput_t * v2); + // rasters a vertex processed triangle using active program; scizors to frame surface + void (* RasterTriangle)(const GGLInterface_t * iface, const VertexOutput_t * v1, + const VertexOutput_t * v2, const VertexOutput_t * v3); + // rasters a vertex processed trapezoid using active program; scizors to frame surface + void (* RasterTrapezoid)(const GGLInterface_t * iface, const VertexOutput_t * tl, + const VertexOutput_t * tr, const VertexOutput_t * bl, + const VertexOutput_t * br); + + // scan line given left and right processed and scizored vertices + void (* ScanLine)(const GGLInterface_t * iface, const VertexOutput_t * v1, + const VertexOutput_t * v2); + + // creates empty shader + gl_shader_t * (* ShaderCreate)(const GGLInterface_t * iface, GLenum type); + // compiles a shader given glsl; returns GL_TRUE on success; glsl only used during call; use infoLog to retrieve status + GLboolean (* ShaderCompile)(const GGLInterface_t * iface, gl_shader_t * shader, + const char * glsl, char ** infoLog); + // could be used after link if original shaders will not be linked in another program + void (* ShaderFree)(const GGLInterface_t * iface, gl_shader_t * shader); + + // creates empty program + gl_shader_program_t * (* ShaderProgramCreate)(const GGLInterface_t * iface); + // duplicates shaders to program, and links varyings / attributes; can link 1 shader + GLboolean (* ShaderProgramLink)(const GGLInterface_t * iface, gl_shader_program_t * program, + const unsigned count, gl_shader_t ** shaders, char ** infoLog); + // frees program + void (* ShaderProgramFree)(const GGLInterface_t * iface, gl_shader_program_t * program); + + // LLVM JIT and set as active program + void (* ShaderUse)(GGLInterface_t * iface, gl_shader_program_t * program); + // bind attribute location before linking + void (* ShaderAttributeBind)(const GGLInterface_t * iface, const gl_shader_program_t * program, + GLuint index, const GLchar * name); + GLint (* ShaderAttributeLocation)(const GGLInterface_t * iface, const gl_shader_program_t * program, + const char * name); + // gets uniform location for linked program + GLint (* ShaderUniformLocation)(const GGLInterface_t * iface, const gl_shader_program_t * program, + const char * name); + void (* ShaderUniformGetfv)(const GGLInterface_t * iface, gl_shader_program_t * program, + GLint location, GLfloat * params); + void (* ShaderUniformGetiv)(const GGLInterface_t * iface, gl_shader_program_t * program, + GLint location, GLint * params); + // updates linked program uniform value by location; return >= 0 indicates sampler assigned + GLint (* ShaderUniform)(const GGLInterface_t * iface, gl_shader_program_t * program, + GLint location, GLsizei count, const GLvoid *values, GLenum type); + // updates linked program uniform matrix value by location + void (* ShaderUniformMatrix)(const GGLInterface_t * iface, gl_shader_program_t * program, + GLint cols, GLint rows, GLint location, GLsizei count, + GLboolean transpose, const GLfloat *values); +}; + +#ifdef __cplusplus +extern "C" +{ +#endif + + GGLInterface_t * CreateGGLInterface(); + + void DestroyGGLInterface(GGLInterface_t * interface); + +#ifdef __cplusplus +} +#endif + +#endif // #ifndef _PIXELFLINGER2_INTERFACE_H_ diff --git a/include/pixelflinger2/pixelflinger2_vector4.h b/include/pixelflinger2/pixelflinger2_vector4.h new file mode 100644 index 0000000..a77bda8 --- /dev/null +++ b/include/pixelflinger2/pixelflinger2_vector4.h @@ -0,0 +1,199 @@ +/** + ** + ** Copyright 2010, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#ifndef _PIXELFLINGER2_VECTOR4_H_ +#define _PIXELFLINGER2_VECTOR4_H_ + +#ifdef __cplusplus + +template <typename Type> struct Vec4 { + union { + struct { Type x, y, z, w; }; + struct { Type r, g, b, a; }; + struct { Type S, T, R, Q; }; +#if !USE_FIXED_POINT + float f[4]; + unsigned u[4]; + int i[4]; +#endif +#if defined(__ARM_HAVE_NEON) && USE_NEON + float32x4_t f4; +#endif + }; + + //Vec4() : x(0), y(0), z(0), w(0) {} + Vec4() {} + Vec4(Type X, Type Y, Type Z, Type W) : x(X), y(Y), z(Z), w(W) {} + Vec4(Type X) : x(X), y(X), z(X), w(X) {} + +#define VECTOR4_OP_UNARY(op,rhs) { \ +x op rhs.x; \ +y op rhs.y; \ +z op rhs.z; \ +w op rhs.w; } + +#define VECTOR4_OP_UNARY_SCALAR(op,rhs) { \ +x op rhs; \ +y op rhs; \ +z op rhs; \ +w op rhs; } + + inline void operator += (const Vec4<Type> & rhs) __attribute__((always_inline)) + VECTOR4_OP_UNARY(+=,rhs) + inline void operator -= (const Vec4<Type> & rhs) __attribute__((always_inline)) + VECTOR4_OP_UNARY(-=,rhs) + inline void operator *= (const Vec4<Type> & rhs) __attribute__((always_inline)) + VECTOR4_OP_UNARY(*=,rhs) + inline void operator /= (const Vec4<Type> & rhs) __attribute__((always_inline)) + VECTOR4_OP_UNARY(/=,rhs) + inline void operator *= (Type rhs) __attribute__((always_inline)) + VECTOR4_OP_UNARY_SCALAR(*=,rhs) + inline void operator /= (Type rhs) __attribute__((always_inline)) + VECTOR4_OP_UNARY_SCALAR(/=,rhs) + + inline Vec4 operator+(const Vec4 & rhs) const + { Vec4 res = *this; res += rhs; return res; } + +#undef VECTOR4_OP_UNARY +#undef VECTOR4_OP_UNARY_SCALAR + + void CrossProduct3(const Vec4<Type> & lhs, const Vec4<Type> & rhs) + { + x = lhs.y * rhs.z - lhs.z * rhs.y; + y = lhs.z * rhs.x - lhs.x * rhs.z; + z = lhs.y * rhs.x - lhs.x * rhs.y; + w = 0; + } + + void LShr(const unsigned shift) { u[0] >>= shift; u[1] >>= shift; u[2] >>= shift; u[3] >>= shift; } + void AShr(const unsigned shift) { i[0] >>= shift; i[1] >>= shift; i[2] >>= shift; i[3] >>= shift; } + + bool operator==(const Vec4 & rhs) const { return u[0] == rhs.u[0] && u[1] == rhs.u[1] && u[2] == rhs.u[2] && u[3] == rhs.u[3]; } + bool operator!=(const Vec4 & rhs) const { return !(*this == rhs); } +}; + +#if defined(__ARM_HAVE_NEON) && USE_NEON +template <> inline void Vec4<float>::operator += (const Vec4<float> & rhs) __attribute__((always_inline)); +template <> inline void Vec4<float>::operator += (const Vec4<float> & rhs) +{ f4 = vaddq_f32(f4, rhs.f4); } +template <> inline void Vec4<float>::operator -= (const Vec4<float> & rhs) __attribute__((always_inline)); +template <> inline void Vec4<float>::operator -= (const Vec4<float> & rhs) +{ f4 = vsubq_f32(f4, rhs.f4); } +template <> inline void Vec4<float>::operator *= (float rhs) __attribute__((always_inline)); +template <> inline void Vec4<float>::operator *= (float rhs) +{ f4 = vmulq_n_f32(f4, rhs); } +template <> inline void Vec4<float>::operator /= (float rhs) __attribute__((always_inline)); +template <> inline void Vec4<float>::operator /= (float rhs) +{ f4 = vmulq_n_f32(f4, 1 / rhs); } +#endif // #if defined(__ARM_HAVE_NEON) && USE_NEON + +#if USE_FIXED_POINT +deprecated, should be removed +/*#define FIXED_POINT_ONE 0x10000 +#define FIXED_POINT_SHIFT 16 +struct FixedPoint +{ + int val; + //FixedPoint() {} + //explicit FixedPoint(int v) : val(v << FIXED_POINT_SHIFT) {} + //explicit FixedPoint(float v) : val(v * (2 << FIXED_POINT_SHIFT)) {} + //explicit FixedPoint(double v) : val(v * (2 << FIXED_POINT_SHIFT)) {} + static FixedPoint From(int v) { FixedPoint x; x.val = v << FIXED_POINT_SHIFT; return x; } + static FixedPoint From(unsigned v) { FixedPoint x; x.val = v << FIXED_POINT_SHIFT; return x; } + static FixedPoint From(float v) { FixedPoint x; x.val = v * (2 << FIXED_POINT_SHIFT); return x; } + static FixedPoint One() { FixedPoint x; x.val = FIXED_POINT_ONE; return x; } + static FixedPoint Zero() { FixedPoint x; x.val = 0; return x; } + FixedPoint operator-() const + { + FixedPoint res; + res.val = -val; + return res; + } + FixedPoint operator+(const FixedPoint & rhs) const + { + FixedPoint res; + res.val = val + rhs.val; + return res; + } + FixedPoint operator-(const FixedPoint & rhs) const + { + FixedPoint res; + res.val = val - rhs.val; + return res; + } + FixedPoint operator*(const FixedPoint & rhs) const + { + FixedPoint res; + res.val = (val >> 8) * (rhs.val >> 8); + return res; + } + FixedPoint operator/(const FixedPoint & rhs) const + { + FixedPoint res; + + long long lh = (long long)val << 32, rh = rhs.val | 1; + lh /= rh; + rh = (lh >> 16) & 0xffffffffL; + res.val = rh; + return res; + + //res.val = ((val << 2) / (rhs.val >> 6 | 1)) << 8; + //return res; + } + void operator+=(const FixedPoint & rhs) { val += rhs.val; } + void operator-=(const FixedPoint & rhs) { val += rhs.val; } + void operator*=(const FixedPoint & rhs) { *this = *this * rhs; } + void operator/=(const FixedPoint & rhs) { *this = *this / rhs; } + + bool operator<(const FixedPoint & rhs) const { return val < rhs.val; } + bool operator>(const FixedPoint & rhs) const { return val > rhs.val; } + bool operator<=(const FixedPoint & rhs) const { return val <= rhs.val; } + bool operator>=(const FixedPoint & rhs) const { return val >= rhs.val; } + bool operator==(const FixedPoint & rhs) const { return val == rhs.val; } + bool operator!=(const FixedPoint & rhs) const { return val != rhs.val; } + + operator int() const { return val >> FIXED_POINT_SHIFT; } + operator unsigned() const { return val >> FIXED_POINT_SHIFT; } + operator float() const { return (float)val / FIXED_POINT_ONE; } +}; + +typedef FixedPoint VectorComp_t; +typedef Vec4<VectorComp_t> Vector4; +#define Vector4_CTR(x,y,z,w) Vector4(FixedPoint::From(x), FixedPoint::From(y), \ + FixedPoint::From(z), FixedPoint::From(w)) +#define VectorComp_t_CTR(x) FixedPoint::From(x) +#define VectorComp_t_Zero FixedPoint::Zero() +#define VectorComp_t_One FixedPoint::One()*/ + +#else // if USE_FIXED_POINT + +typedef float VectorComp_t; +typedef struct Vec4<VectorComp_t> Vector4; +#define Vector4_CTR(x,y,z,w) Vector4(x,y,z,w) +#define VectorComp_t_CTR(x) (float)(x) +#define VectorComp_t_Zero 0 +#define VectorComp_t_One 1 + +#endif // if USE_FIXED_POINT + +#else // #ifdef __cplusplus + +typedef float Vector4 [4]; + +#endif // #ifdef __cplusplus + +#endif // #ifndef _PIXELFLINGER2_VECTOR4_H_
\ No newline at end of file diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index e4312ae..f5e6855 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -30,7 +30,7 @@ #include <cassert> extern "C" { -#include "GL/gl.h" +#include "GLES2/gl2.h" #include <hieralloc.h> } diff --git a/src/glsl/ir_to_llvm.cpp b/src/glsl/ir_to_llvm.cpp index f457c82..e09e3d4 100644 --- a/src/glsl/ir_to_llvm.cpp +++ b/src/glsl/ir_to_llvm.cpp @@ -62,9 +62,17 @@ using namespace tr1; #include "ir_visitor.h" #include "glsl_types.h" +struct GGLContext; + +llvm::Value * tex2D(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler, + const GGLContext * gglCtx); +llvm::Value * texCube(llvm::IRBuilder<> & builder, llvm::Value * in1, const unsigned sampler, + const GGLContext * gglCtx); + class ir_to_llvm_visitor : public ir_visitor { -public: ir_to_llvm_visitor(); +public: + llvm::LLVMContext& ctx; llvm::Module* mod; @@ -74,9 +82,11 @@ public: llvm::BasicBlock* bb; llvm::Value* result; llvm::IRBuilder<> bld; + + const GGLContext * gglCtx; - ir_to_llvm_visitor(llvm::LLVMContext& p_ctx, llvm::Module* p_mod) - : ctx(p_ctx), mod(p_mod), fun(0), loop(std::make_pair((llvm::BasicBlock*)0, (llvm::BasicBlock*)0)), bb(0), bld(ctx) + ir_to_llvm_visitor(llvm::LLVMContext& p_ctx, llvm::Module* p_mod, const GGLContext * GGLCtx) + : ctx(p_ctx), mod(p_mod), fun(0), loop(std::make_pair((llvm::BasicBlock*)0, (llvm::BasicBlock*)0)), bb(0), bld(ctx), gglCtx(GGLCtx) { } @@ -809,7 +819,10 @@ public: virtual void visit(class ir_texture * ir) { - // TODO + assert(ir_tex == ir->op); + llvm::Value * coordinate = llvm_value(ir->coordinate); + result = tex2D(bld, coordinate, 0, gglCtx); + assert(result); } virtual void visit(class ir_discard * ir) @@ -1198,11 +1211,11 @@ public: }; struct llvm::Module * -glsl_ir_to_llvm_module(struct exec_list *ir) +glsl_ir_to_llvm_module(struct exec_list *ir, const GGLContext * gglCtx) { llvm::LLVMContext& ctx = llvm::getGlobalContext(); llvm::Module* mod = new llvm::Module("glsl", ctx); - ir_to_llvm_visitor v(ctx, mod); + ir_to_llvm_visitor v(ctx, mod, gglCtx); visit_exec_list(ir, &v); diff --git a/src/glsl/ir_to_llvm.h b/src/glsl/ir_to_llvm.h index 64acf19..fa2154c 100644 --- a/src/glsl/ir_to_llvm.h +++ b/src/glsl/ir_to_llvm.h @@ -4,6 +4,6 @@ #include "llvm/Module.h" #include "ir.h" -struct llvm::Module * glsl_ir_to_llvm_module(struct exec_list *ir); +struct llvm::Module * glsl_ir_to_llvm_module(struct exec_list *ir, const struct GGLContext * gglCtx); #endif /* IR_TO_LLVM_H_ */ diff --git a/src/glsl/ir_to_llvm_helper.cpp b/src/glsl/ir_to_llvm_helper.cpp new file mode 100644 index 0000000..5fd4045 --- /dev/null +++ b/src/glsl/ir_to_llvm_helper.cpp @@ -0,0 +1,619 @@ +#include <stack> +#include <stdio.h> + +#include "src/pixelflinger2/pixelflinger2.h" + +#include <llvm/Support/IRBuilder.h> +#include <llvm/Module.h> + +using namespace llvm; + +static const char * name(const char * str) +{ + return str; +} + +static Value * minIntScalar(IRBuilder<> &builder, Value * in1, Value * in2) +{ + Value * cmp = builder.CreateICmpSLT(in1, in2); + return builder.CreateSelect(cmp, in1, in2); +} + +static Value * maxIntScalar(IRBuilder<> &builder, Value * in1, Value * in2) +{ + Value * cmp = builder.CreateICmpSGT(in1, in2); + return builder.CreateSelect(cmp, in1, in2); +} + +static Constant * constFloat(IRBuilder<> & builder, float x) +{ + return ConstantFP::get(builder.getContext(), APFloat(x)); +} + +static VectorType * intVecType(IRBuilder<> & builder) +{ + return VectorType::get(Type::getInt32Ty(builder.getContext()), 4); +} + +static VectorType * floatVecType(IRBuilder<> & builder) +{ + return VectorType::get(Type::getFloatTy(builder.getContext()), 4); +} + +static Value * constIntVec(IRBuilder<> & builder, int x, int y, int z, int w) +{ + std::vector<Constant *> vec(4); + vec[0] = builder.getInt32(x); + vec[1] = builder.getInt32(y); + vec[2] = builder.getInt32(z); + vec[3] = builder.getInt32(w); + return ConstantVector::get(intVecType(builder), vec); +} + +static Value * intVec(IRBuilder<> & builder, Value * x, Value * y, Value * z, Value * w) +{ + Value * res = Constant::getNullValue(intVecType(builder)); + res = builder.CreateInsertElement(res, x, builder.getInt32(0), name("vecx")); + res = builder.CreateInsertElement(res, y, builder.getInt32(1), name("vecy")); + res = builder.CreateInsertElement(res, z, builder.getInt32(2), name("vecz")); + if (w) + res = builder.CreateInsertElement(res, w, builder.getInt32(3), name("vecw")); + return res; +} + +static Value * constFloatVec(IRBuilder<> & builder, float x, float y, float z, float w) +{ + std::vector<Constant *> vec(4); + vec[0] = constFloat(builder, x); + vec[1] = constFloat(builder, y); + vec[2] = constFloat(builder, z); + vec[3] = constFloat(builder, w); + return ConstantVector::get(floatVecType(builder), vec); +} + +std::vector<Value *> extractVector(IRBuilder<> & builder, Value *vec) +{ + std::vector<Value*> elems(4); + elems[0] = builder.CreateExtractElement(vec, builder.getInt32(0), name("x")); + elems[1] = builder.CreateExtractElement(vec, builder.getInt32(1), name("y")); + elems[2] = builder.CreateExtractElement(vec, builder.getInt32(2), name("z")); + elems[3] = builder.CreateExtractElement(vec, builder.getInt32(3), name("w")); + return elems; +} + +// <4 x i32> [0, 255] to <4 x float> [0.0, 1.0] +static Value * intColorVecToFloatColorVec(IRBuilder<> & builder, Value * vec) +{ +// return builder.CreateBitCast(vec, floatVecType(builder)); + vec = builder.CreateUIToFP(vec, floatVecType(builder)); + return builder.CreateFMul(vec, constFloatVec(builder, 1 / 255.0f, 1 / 255.0f, + 1 / 255.0f, 1 / 255.0f)); +} + +// texture data is int pointer to surface (will cast to short for 16bpp), index is linear texel index, +// format is GGLPixelFormat for surface, return type is <4 x i32> rgba +Value * pointSample(IRBuilder<> & builder, Value * textureData, Value * index, const GGLPixelFormat format) +{ + Value * texel = NULL; + switch (format) { + case GGL_PIXEL_FORMAT_RGBA_8888: + textureData = builder.CreateGEP(textureData, index); + texel = builder.CreateLoad(textureData, "texel"); + break; + case GGL_PIXEL_FORMAT_RGBX_8888: + textureData = builder.CreateGEP(textureData, index); + texel = builder.CreateLoad(textureData, "texel"); + texel = builder.CreateOr(texel, builder.getInt32(0xff000000)); + break; + case GGL_PIXEL_FORMAT_RGB_565: { + textureData = builder.CreateBitCast(textureData, PointerType::get( + Type::getInt16Ty(builder.getContext()),0)); + textureData = builder.CreateGEP(textureData, index); + texel = builder.CreateLoad(textureData, "texel565"); + texel = builder.CreateZExt(texel, Type::getInt32Ty(builder.getContext())); + + Value * r = builder.CreateAnd(texel, builder.getInt32(0x1f)); + r = builder.CreateShl(r, builder.getInt32(3)); + r = builder.CreateOr(r, builder.CreateLShr(r, builder.getInt32(5))); + + Value * g = builder.CreateAnd(texel, builder.getInt32(0x7e0)); + g = builder.CreateShl(g, builder.getInt32(5)); + g = builder.CreateOr(g, builder.CreateLShr(g, builder.getInt32(6))); + g = builder.CreateAnd(g, builder.getInt32(0xff00)); + + Value * b = builder.CreateAnd(texel, builder.getInt32(0xF800)); + b = builder.CreateShl(b, builder.getInt32(8)); + b = builder.CreateOr(b, builder.CreateLShr(b, builder.getInt32(5))); + b = builder.CreateAnd(b, builder.getInt32(0xff0000)); + + texel = builder.CreateOr(r, builder.CreateOr(g, b)); + texel = builder.CreateOr(texel, builder.getInt32(0xff000000), name("texel")); + break; + } + case GGL_PIXEL_FORMAT_UNKNOWN: // usually means texture not set yet + debug_printf("pointSample: unknown format, default to 0xff0000ff \n"); + texel = builder.getInt32(0xff0000ff); + break; + default: + assert(0); + break; + } + + Value * channels = Constant::getNullValue(intVecType(builder)); + +// if (dstDesc && dstDesc->IsInt32Color()) { +// channels = builder.CreateInsertElement(channels, texel, builder.getInt32(0)); +// channels = builder.CreateBitCast(channels, floatVecType(builder)); +// return channels; +// } else if (!dstDesc || dstDesc->IsVectorType()) { + channels = builder.CreateInsertElement(channels, texel, builder.getInt32(0)); + channels = builder.CreateInsertElement(channels, texel, builder.getInt32(1)); + channels = builder.CreateInsertElement(channels, texel, builder.getInt32(2)); + channels = builder.CreateInsertElement(channels, texel, builder.getInt32(3)); +// if (dstDesc && dstDesc->IsVectorType(Fixed8)) { +// channels = builder.CreateLShr(channels, constIntVec(builder, 0, 8, 16, 24)); +// channels = builder.CreateAnd(channels, constIntVec(builder, 0xff, 0xff, 0xff, 0xff)); +// channels = builder.CreateBitCast(channels, floatVecType(builder)); +// } else if (dstDesc && dstDesc->IsVectorType(Fixed16)) { +// channels = builder.CreateShl(channels, constIntVec(builder, 8, 0, 0, 0)); +// channels = builder.CreateLShr(channels, constIntVec(builder, 0, 0, 8, 16)); +// channels = builder.CreateAnd(channels, constIntVec(builder, 0xff00, 0xff00, 0xff00, 0xff00)); +// channels = builder.CreateBitCast(channels, floatVecType(builder)); +// } else if (!dstDesc || dstDesc->IsVectorType(Float)) { // no analysis done in vertex shader, so use default float [0,1] output + channels = builder.CreateLShr(channels, constIntVec(builder, 0, 8, 16, 24)); + channels = builder.CreateAnd(channels, constIntVec(builder, 0xff, 0xff, 0xff, 0xff)); +// channels = builder.CreateUIToFP(channels, floatVecType(builder)); +// channels = builder.CreateFMul(channels, constFloatVec(builder, 1 / 255.0f, 1 / 255.0f, +// 1 / 255.0f, 1 / 255.0f)); +// } else +// assert(0); +// } else +// assert(0); + + return channels; +} + +static const unsigned SHIFT = 16; + +// w = width - 1, h = height - 1; similar to pointSample; returns <4 x i32> rgba +Value * linearSample(IRBuilder<> & builder, Value * textureData, Value * indexOffset, + Value * x0, Value * y0, Value * xLerp, Value * yLerp, + Value * w, Value * h, Value * width, Value * height, + const GGLPixelFormat format/*, const RegDesc * dstDesc*/) +{ + // TODO: linear filtering needs to be fixed for texcoord outside of [0,1] + Value * x1 = builder.CreateAdd(x0, builder.getInt32(1)); + x1 = minIntScalar(builder, x1, w); + Value * y1 = builder.CreateAdd(y0, builder.getInt32(1)); + y1 = minIntScalar(builder, y1, h); + +// RegDesc regDesc; +// regDesc.SetVectorType(Fixed8); + + Value * index = builder.CreateMul(y0, width); + index = builder.CreateAdd(index, x0); + index = builder.CreateAdd(index, indexOffset); + Value * s0 = pointSample(builder, textureData, index, format/*, ®Desc*/); +// s0 = builder.CreateBitCast(s0, intVecType(builder)); + + index = builder.CreateMul(y0, width); + index = builder.CreateAdd(index, x1); + index = builder.CreateAdd(index, indexOffset); + Value * s1 = pointSample(builder, textureData, index, format/*, ®Desc*/); +// s1 = builder.CreateBitCast(s1, intVecType(builder)); + + index = builder.CreateMul(y1, width); + index = builder.CreateAdd(index, x1); + index = builder.CreateAdd(index, indexOffset); + Value * s2 = pointSample(builder, textureData, index, format/*, ®Desc*/); +// s2 = builder.CreateBitCast(s2, intVecType(builder)); + + index = builder.CreateMul(y1, width); + index = builder.CreateAdd(index, x0); + index = builder.CreateAdd(index, indexOffset); + Value * s3 = pointSample(builder, textureData, index, format/*, ®Desc*/); +// s3 = builder.CreateBitCast(s3, intVecType(builder)); + + Value * xLerpVec = intVec(builder, xLerp, xLerp, xLerp, xLerp); + + Value * h0 = builder.CreateMul(builder.CreateSub(s1, s0), xLerpVec); + // arithmetic shift right, since it's the result of subtraction, which could be negative + h0 = builder.CreateAShr(h0, constIntVec(builder, SHIFT, SHIFT, SHIFT, SHIFT)); + h0 = builder.CreateAdd(h0, s0); + + Value * h1 = builder.CreateMul(builder.CreateSub(s2, s3), xLerpVec); + h1 = builder.CreateAShr(h1, constIntVec(builder, SHIFT, SHIFT, SHIFT, SHIFT)); + h1 = builder.CreateAdd(h1, s3); + + Value * sample = builder.CreateMul(builder.CreateSub(h1, h0), + intVec(builder, yLerp, yLerp, yLerp, yLerp)); + sample = builder.CreateAShr(sample, constIntVec(builder, SHIFT, SHIFT, SHIFT, SHIFT)); + sample = builder.CreateAdd(sample, h0); + + return sample; +// if (!dstDesc || dstDesc->IsVectorType(Float)) { +// sample = builder.CreateUIToFP(sample, floatVecType(builder)); +// return builder.CreateFMul(sample, constFloatVec(builder, 1 / 255.0f, 1 / 255.0f, +// 1 / 255.0f, 1 / 255.0f)); +// } else if (dstDesc && dstDesc->IsVectorType(Fixed16)) { +// sample = builder.CreateShl(sample, constIntVec(builder, 8, 8, 8, 8)); +// return builder.CreateBitCast(sample, floatVecType(builder)); +// } else if (dstDesc && dstDesc->IsVectorType(Fixed8)) +// return builder.CreateBitCast(sample, floatVecType(builder)); +// else if (dstDesc && dstDesc->IsInt32Color()) { +// sample = builder.CreateShl(sample, constIntVec(builder, 0, 8, 16, 24)); +// std::vector<llvm::Value*> samples = extractVector(sample); +// samples[0] = builder.CreateOr(samples[0], samples[1]); +// samples[0] = builder.CreateOr(samples[0], samples[2]); +// samples[0] = builder.CreateOr(samples[0], samples[3]); +// sample = builder.CreateInsertElement(sample, samples[0], builder.getInt32(0)); +// return builder.CreateBitCast(sample, floatVecType(builder)); +// } else +// assert(0); +} + +class CondBranch +{ + IRBuilder<> & m_builder; + std::stack<BasicBlock *> m_ifStack; + +public: + CondBranch(IRBuilder<> & builder) : m_builder(builder) {} + ~CondBranch() { + assert(m_ifStack.empty()); + } + + void ifCond(Value * cmp, const char * trueBlock = "ifT", const char * falseBlock = "ifF") { + Function * function = m_builder.GetInsertBlock()->getParent(); + BasicBlock * ifthen = BasicBlock::Create(m_builder.getContext(), name(trueBlock), function, NULL); + BasicBlock * ifend = BasicBlock::Create(m_builder.getContext(), name(falseBlock), function, NULL); + m_builder.CreateCondBr(cmp, ifthen, ifend); + m_builder.SetInsertPoint(ifthen); + m_ifStack.push(ifend); + } + + void elseop() { + assert(!m_ifStack.empty()); + BasicBlock *ifend = BasicBlock::Create(m_builder.getContext(), name("else_end"), m_builder.GetInsertBlock()->getParent(),0); + if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator + m_builder.CreateBr(ifend); // branch is also a block terminator + else { + debug_printf("Instructions::elseop block alread has terminator \n"); + m_builder.GetInsertBlock()->getTerminator()->dump(); + assert(0); + } + m_builder.SetInsertPoint(m_ifStack.top()); + m_builder.GetInsertBlock()->setName(name("else_then")); + m_ifStack.pop(); + m_ifStack.push(ifend); + } + + void endif() { + assert(!m_ifStack.empty()); + if (!m_builder.GetInsertBlock()->getTerminator()) // ret void is a block terminator + m_builder.CreateBr(m_ifStack.top()); // branch is also a block terminator + else { + debug_printf("Instructions::endif block alread has terminator"); + m_builder.GetInsertBlock()->getTerminator()->dump(); + assert(0); + } + m_builder.SetInsertPoint(m_ifStack.top()); + m_ifStack.pop(); + } +}; + +// dim is size - 1, since [0.0f,1.0f]->[0, size - 1] +static Value * texcoordWrap(IRBuilder<> & builder, const unsigned wrap, + /*const ChannelType type,*/ Value * r, Value * size, Value * dim, + Value ** texelLerp) +{ + const Type * intType = Type::getInt32Ty(builder.getContext()); + Value * tc = NULL; + Value * odd = NULL; +// if (Float == type) { + // convert float to fixed16 so that 16LSB are the remainder, and bit 16 is one + // mantissa is the amount between two texels, used for linear interpolation + tc = ConstantFP::get(builder.getContext(), APFloat(float(1 << SHIFT))); + tc = builder.CreateFMul(tc, r); + tc = builder.CreateFPToSI(tc, intType); +// } else if (Fixed16 == type) { +// assert(16 == SHIFT); +// tc = builder.CreateBitCast(r, Type::getInt32Ty(builder.getContext())); +// } else +// assert(0); + + odd = builder.CreateAnd(tc, builder.getInt32(1 << SHIFT), name("tc_odd")); + + if (0 == wrap || 2 == wrap) // just the mantissa for wrap and mirrored + tc = builder.CreateAnd(tc, builder.getInt32((1 << SHIFT) - 1)); + + tc = builder.CreateMul(tc, dim); + + *texelLerp = builder.CreateAnd(tc, builder.getInt32((1 << SHIFT) - 1)); + + tc = builder.CreateLShr(tc, builder.getInt32(SHIFT)); + + if (0 == wrap) // GL_REPEAT + { } else if (1 == wrap) { // GL_CLAMP_TO_EDGE + tc = maxIntScalar(builder, tc, builder.getInt32(0)); + tc = minIntScalar(builder, tc, dim); + } else if (2 == wrap) { // GL_MIRRORER_REPEAT + Value * tcPtr = builder.CreateAlloca(intType); + builder.CreateStore(tc, tcPtr); + odd = builder.CreateICmpNE(odd, builder.getInt32(0)); + + CondBranch condBranch(builder); + condBranch.ifCond(odd); + + tc = builder.CreateSub(dim, tc, name("tc_mirrored")); + builder.CreateStore(tc, tcPtr); + + condBranch.endif(); + + tc = builder.CreateLoad(tcPtr); + } else + assert(0); + + return tc; +} + +Value * tex2D(IRBuilder<> & builder, Value * in1, const unsigned sampler, + /*const RegDesc * in1Desc, const RegDesc * dstDesc,*/ + const GGLContext * gglCtx) +{ + const Type * intType = builder.getInt32Ty(); + const PointerType * intPointerType = PointerType::get(intType, 0); + + llvm::Module * module = builder.GetInsertBlock()->getParent()->getParent(); + std::vector<Value * > texcoords = extractVector(builder, in1); + + Value * textureDimensions = module->getGlobalVariable(_PF2_TEXTURE_DIMENSIONS_NAME_); + if (!textureDimensions) + textureDimensions = new GlobalVariable(*module, intType, true, + GlobalValue::ExternalLinkage, + NULL, _PF2_TEXTURE_DIMENSIONS_NAME_); + Value * textureWidth = builder.CreateConstInBoundsGEP1_32(textureDimensions, + sampler * 2); + textureWidth = builder.CreateLoad(textureWidth, name("textureWidth")); + Value * textureHeight = builder.CreateConstInBoundsGEP1_32(textureDimensions, + sampler * 2 + 1); + textureHeight = builder.CreateLoad(textureHeight, name("textureHeight")); + Value * textureW = builder.CreateSub(textureWidth, builder.getInt32(1)); + Value * textureH = builder.CreateSub(textureHeight, builder.getInt32(1)); +// ChannelType sType = Float, tType = Float; +// if (in1Desc) { +// sType = in1Desc->channels[0]; +// tType = in1Desc->channels[1]; +// } + + Value * xLerp = NULL, * yLerp = NULL; + Value * x = texcoordWrap(builder, gglCtx->textureState.textures[sampler].wrapS, + /*sType, */texcoords[0], textureWidth, textureW, &xLerp); + Value * y = texcoordWrap(builder, gglCtx->textureState.textures[sampler].wrapT, + /*tType, */texcoords[1], textureHeight, textureH, &yLerp); + + Value * index = builder.CreateMul(y, textureWidth); + index = builder.CreateAdd(index, x); + + Value * textureData = module->getGlobalVariable(_PF2_TEXTURE_DATA_NAME_); + if (!textureData) + textureData = new GlobalVariable(*module, intPointerType, + true, GlobalValue::ExternalLinkage, + NULL, _PF2_TEXTURE_DATA_NAME_); + + textureData = builder.CreateConstInBoundsGEP1_32(textureData, sampler); + textureData = builder.CreateLoad(textureData); + + if (0 == gglCtx->textureState.textures[sampler].minFilter && + 0 == gglCtx->textureState.textures[sampler].magFilter) { // GL_NEAREST + Value * ret = pointSample(builder, textureData, index, + gglCtx->textureState.textures[sampler].format/*, dstDesc*/); + ret->dump(); + return intColorVecToFloatColorVec(builder, ret); + } else if (1 == gglCtx->textureState.textures[sampler].minFilter && + 1 == gglCtx->textureState.textures[sampler].magFilter) { // GL_LINEAR + Value * ret = linearSample(builder, textureData, builder.getInt32(0), x, y, xLerp, yLerp, + textureW, textureH, textureWidth, textureHeight, + gglCtx->textureState.textures[sampler].format/*, dstDesc*/); + ret->dump(); + return intColorVecToFloatColorVec(builder, ret); + } else + assert(!"unsupported texture filter"); + return NULL; +} + +// only positive float; used in cube map since major axis is positive +static Value * FCmpGT(IRBuilder<> & builder, Value * lhs, Value * rhs) +{ + const Type * const intType = Type::getInt32Ty(builder.getContext()); + lhs = builder.CreateBitCast(lhs, intType); + rhs = builder.CreateBitCast(rhs, intType); + return builder.CreateICmpUGT(lhs, rhs); +} + +static Value * FPositive(IRBuilder<> & builder, Value * val) +{ + // float cmp faster here + return builder.CreateFCmpOGE(val, Constant::getNullValue(builder.getFloatTy())); + //val = builder.CreateBitCast(val, Type::getInt32Ty(builder.getContext())); + //return builder.CreateICmpSGE(val, storage->constantInt(0)); + //val = builder.CreateAnd(val, storage->constantInt(0x80000000)); + //return builder.CreateICmpNE(val, storage->constantInt(0)); +} + +static Value * Fabs(IRBuilder<> & builder, Value * val) +{ + val = builder.CreateBitCast(val, builder.getInt32Ty()); + val = builder.CreateAnd(val, builder.getInt32(~0x80000000)); + return builder.CreateBitCast(val, builder.getFloatTy()); + //return builder.CreateICmpSGE(val, storage->constantInt(0)); +} + +Value * texCube(IRBuilder<> & builder, Value * in1, const unsigned sampler, + /*const RegDesc * in1Desc, const RegDesc * dstDesc,*/ + const GGLContext * gglCtx) +{ +// if (in1Desc) // the major axis determination code is only float for now +// assert(in1Desc->IsVectorType(Float)); + + const Type * const intType = builder.getInt32Ty(); + const PointerType * const intPointerType = PointerType::get(intType, 0); + const Type * const floatType = builder.getFloatTy(); + + Constant * const float1 = constFloat(builder, 1.0f); + Constant * const float0_5 = constFloat(builder, 0.5f); + + Module * module = builder.GetInsertBlock()->getParent()->getParent(); + std::vector<Value * > texcoords = extractVector(builder, in1); + + Value * textureDimensions = module->getGlobalVariable("textureDimensions"); + if (!textureDimensions) + textureDimensions = new GlobalVariable(*module, intType, true, + GlobalValue::ExternalLinkage, + NULL, "textureDimensions"); + Value * textureWidth = builder.CreateConstInBoundsGEP1_32(textureDimensions, + sampler * 2); + textureWidth = builder.CreateLoad(textureWidth, name("textureWidth")); + Value * textureHeight = builder.CreateConstInBoundsGEP1_32(textureDimensions, + sampler * 2 + 1); + textureHeight = builder.CreateLoad(textureHeight, name("textureHeight")); + Value * textureW = builder.CreateSub(textureWidth, builder.getInt32(1)); + Value * textureH = builder.CreateSub(textureHeight, builder.getInt32(1)); + + Value * mx = Fabs(builder, texcoords[0]), * my = Fabs(builder, texcoords[1]); + Value * mz = Fabs(builder, texcoords[2]); + Value * sPtr = builder.CreateAlloca(floatType); + Value * tPtr = builder.CreateAlloca(floatType); + Value * maPtr = builder.CreateAlloca(floatType); + Value * facePtr = builder.CreateAlloca(intType); + + Value * mxGmyCmp = FCmpGT(builder, mx, my); + Value * mxGmzCmp = FCmpGT(builder, mx, mz); + + CondBranch condBranch(builder); + condBranch.ifCond(builder.CreateAnd(mxGmyCmp, mxGmzCmp)); // if (mx > my && mx > mz) +// m_storage->setCurrentBlock(currentBlock(), false); + { + condBranch.ifCond(FPositive(builder, texcoords[0])); +// m_storage->setCurrentBlock(currentBlock(), false); + { + builder.CreateStore(builder.CreateFNeg(texcoords[2]), sPtr); + builder.CreateStore(builder.CreateFNeg(texcoords[1]), tPtr); + builder.CreateStore(builder.getInt32(0), facePtr); + } + condBranch.elseop(); +// m_storage->setCurrentBlock(currentBlock(), false); + { + builder.CreateStore((texcoords[2]), sPtr); + builder.CreateStore(builder.CreateFNeg(texcoords[1]), tPtr); + builder.CreateStore(builder.getInt32(1), facePtr); + } + condBranch.endif(); // end if (x >= 0) +// m_storage->setCurrentBlock(currentBlock(), false); + + builder.CreateStore(mx, maPtr); + } + condBranch.elseop(); // !(mx > my && mx > mz) +// m_storage->setCurrentBlock(currentBlock(), false); + { + Value * myGmxCmp = FCmpGT(builder, my, mx); + Value * myGmzCmp = FCmpGT(builder, my, mz); + condBranch.ifCond(builder.CreateAnd(myGmxCmp, myGmzCmp)); // my > mx && my > mz +// m_storage->setCurrentBlock(currentBlock(), false); + { + condBranch.ifCond(FPositive(builder, texcoords[1])); +// m_storage->setCurrentBlock(currentBlock(), false); + { + builder.CreateStore((texcoords[0]), sPtr); + builder.CreateStore((texcoords[2]), tPtr); + builder.CreateStore(builder.getInt32(2), facePtr); + } + condBranch.elseop(); +// m_storage->setCurrentBlock(currentBlock(), false); + { + builder.CreateStore(texcoords[0], sPtr); + builder.CreateStore(builder.CreateFNeg(texcoords[2]), tPtr); + builder.CreateStore(builder.getInt32(3), facePtr); + } + condBranch.endif(); +// m_storage->setCurrentBlock(currentBlock(), false); + + builder.CreateStore(my, maPtr); + } + condBranch.elseop(); // !(my > mx && my > mz) +// m_storage->setCurrentBlock(currentBlock(), false); + { + //ifCond(builder.CreateFCmpOGE(texcoords[2], float0, name("zPositive"))); + condBranch.ifCond(FPositive(builder, texcoords[2])); +// m_storage->setCurrentBlock(currentBlock(), false); + { + builder.CreateStore((texcoords[0]), sPtr); + builder.CreateStore(builder.CreateFNeg(texcoords[1]), tPtr); + builder.CreateStore(builder.getInt32(4), facePtr); + } + condBranch.elseop(); +// m_storage->setCurrentBlock(currentBlock(), false); + { + builder.CreateStore(builder.CreateFNeg(texcoords[0]), sPtr); + builder.CreateStore(builder.CreateFNeg(texcoords[1]), tPtr); + builder.CreateStore(builder.getInt32(5), facePtr); + } + condBranch.endif(); // end if (x >= 0) +// m_storage->setCurrentBlock(currentBlock(), false); + + builder.CreateStore(mz, maPtr); + } + condBranch.endif(); +// m_storage->setCurrentBlock(currentBlock(), false); + } + condBranch.endif(); +// m_storage->setCurrentBlock(currentBlock(), false); + + + Value * s = builder.CreateLoad(sPtr); + Value * t = builder.CreateLoad(tPtr); + Value * ma = builder.CreateLoad(maPtr); + Value * face = builder.CreateLoad(facePtr); + + s = builder.CreateFDiv(s, ma); + s = builder.CreateFAdd(s, float1); + s = builder.CreateFMul(s, float0_5); + + t = builder.CreateFDiv(t, ma); + t = builder.CreateFAdd(t, float1); + t = builder.CreateFMul(t, float0_5); + +// ChannelType sType = Float, tType = Float; + Value * xLerp = NULL, * yLerp = NULL; + Value * x = texcoordWrap(builder, gglCtx->textureState.textures[sampler].wrapS, + /*sType, */s, textureWidth, textureW, &xLerp); + Value * y = texcoordWrap(builder, gglCtx->textureState.textures[sampler].wrapT, + /*tType, */t, textureHeight, textureH, &yLerp); + Value * indexOffset = builder.CreateMul(builder.CreateMul(textureHeight, textureWidth), face); + Value * index = builder.CreateAdd(builder.CreateMul(y, textureWidth), x); + + Value * textureData = module->getGlobalVariable("textureData"); + if (!textureData) + textureData = new GlobalVariable(*module, intPointerType, + true, GlobalValue::ExternalLinkage, + NULL, "textureData"); + + textureData = builder.CreateConstInBoundsGEP1_32(textureData, sampler); + textureData = builder.CreateLoad(textureData); + + if (0 == gglCtx->textureState.textures[sampler].minFilter && + 0 == gglCtx->textureState.textures[sampler].magFilter) { // GL_NEAREST + textureData = pointSample(builder, textureData, builder.CreateAdd(indexOffset, index), + gglCtx->textureState.textures[sampler].format/*, dstDesc*/); + return intColorVecToFloatColorVec(builder, textureData); + + } else if (1 == gglCtx->textureState.textures[sampler].minFilter && + 1 == gglCtx->textureState.textures[sampler].magFilter) { // GL_LINEAR + textureData = linearSample(builder, textureData, indexOffset, x, y, xLerp, yLerp, + textureW, textureH, textureWidth, textureHeight, + gglCtx->textureState.textures[sampler].format/*, dstDesc*/); + return intColorVecToFloatColorVec(builder, textureData); + } else + assert(!"unsupported texture filter"); + return NULL; +} diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 80e2213..10b6f04 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1466,7 +1466,8 @@ assign_varying_locations(struct gl_shader_program *prog, void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) { - void *mem_ctx = hieralloc_init("temporary linker context"); + //void *mem_ctx = hieralloc_init("temporary linker context"); + void * mem_ctx = prog; // need linked & cloned ir to persist prog->LinkStatus = false; prog->Validated = false; @@ -1670,5 +1671,5 @@ done: reparent_ir(prog->_LinkedShaders[i]->ir, prog->_LinkedShaders[i]->ir); } - hieralloc_free(mem_ctx); + //hieralloc_free(mem_ctx); } diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index e99f2c1..175719d 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -39,6 +39,10 @@ #include "ir_to_llvm.h" +#include "src/pixelflinger2/pixelflinger2.h" + +GGLInterface * ggl = NULL; + extern "C" struct gl_shader * _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); @@ -263,7 +267,7 @@ extern "C" void * PresentDrawingSurface(); extern "C" void DisposeDrawingSurface(); #endif -void execute(void (* function)(), float * data) +void execute(void (* function)(), gl_shader * shader) { #if defined __arm__ && DRAW_TO_SCREEN unsigned width = 0, height = 0, bpp = 0; @@ -275,10 +279,30 @@ void execute(void (* function)(), float * data) const unsigned width = 480, height = 800; unsigned * frameSurface = new unsigned [width * height]; #endif - const unsigned scale = 16, portWidth = 80, portHeight = 50; + //const unsigned scale = 16, portWidth = 80, portHeight = 50; + unsigned scale = 1, portWidth = width, portHeight = height; + + float * data = (float *)shader->Source; float * constants = data + 36; float * outputs = data + 0; float * inputs = data + 12; + int glFragColorLocation = 0; + int vTexCoordLocation = -1; + if (shader->symbols->get_variable("vTexCoord")) + vTexCoordLocation = shader->symbols->get_variable("vTexCoord")->location; + int vNormalLocation = -1; + if (shader->symbols->get_variable("vNormal")) + vNormalLocation = shader->symbols->get_variable("vNormal")->location; + if (shader->symbols->get_variable("uRotM")) + { + float * matrix = data + 4 * 1 + 4 * shader->symbols->get_variable("uRotM")->location; + memset(matrix, 0, 16 * sizeof(*matrix)); + matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1; + matrix[28] = 0; + matrix[29] = 0; + matrix[30] = 0; + matrix[31] = 0; + } printf("executing... \n function=%p, data=%p \n", function, data); /* @@ -297,8 +321,8 @@ void execute(void (* function)(), float * data) unsigned frames = 1; clock_t c0 = clock(); - while(true) - for (frames = 1; frames <= 100; frames++) + //while(true) + for (frames = 1; frames <= 10; frames++) { inputs[2] = 0; inputs[3] = 1; @@ -309,25 +333,45 @@ void execute(void (* function)(), float * data) for (unsigned x = 0; x < portWidth; x++) { //data[36] = (float)i / 10000; //memset(data, i, sizeof(data)); - inputs[0] = ((float)x) / (portWidth - 1); - inputs[1] = ((float)y) / (portHeight - 1); + //inputs[0] = ((float)x) / (portWidth - 1); + //inputs[1] = ((float)y) / (portHeight - 1); + if (vTexCoordLocation > -1) + { + data[1 * 4 + vTexCoordLocation * 4 + 0] = ((float)x) / (portWidth - 1); + data[1 * 4 + vTexCoordLocation * 4 + 1] = ((float)y) / (portHeight - 1); + data[1 * 4 + vTexCoordLocation * 4 + 2] = 0; + data[1 * 4 + vTexCoordLocation * 4 + 3] = 1; + } + if (vNormalLocation > -1) + { + data[1 * 4 + vNormalLocation * 4 + 0] = 0; + data[1 * 4 + vNormalLocation * 4 + 1] = 1; + data[1 * 4 + vNormalLocation * 4 + 2] = 0; + data[1 * 4 + vNormalLocation * 4 + 3] = 1; + } function(); unsigned r = outputs[0] * 255; unsigned g = outputs[1] * 255; unsigned b = outputs[2] * 255; unsigned a = outputs[3] * 255; +// unsigned r = *(unsigned *)(outputs + 0); +// unsigned g = *(unsigned *)(outputs + 1); +// unsigned b = *(unsigned *)(outputs + 2); +// unsigned a = *(unsigned *)(outputs + 3); frameSurface[y * width + x] = (a << 24) | (b << 16) | (g << 8) | r; +// frameSurface[y * width + x] = *(unsigned *)outputs; } //* - for (int y = portHeight - 1; y >= 0; y--) - for (int x = portWidth - 1; x >= 0; x--) - { + if (scale > 1) + for (int y = portHeight - 1; y >= 0; y--) + for (int x = portWidth - 1; x >= 0; x--) + { unsigned pixel = ((unsigned *)frameSurface)[y * width + x]; for (unsigned xx = 0; xx < scale; xx++) for (unsigned yy = 0; yy < scale; yy++) ((unsigned *)frameSurface)[(y * scale + yy) * width + x * scale + xx] = pixel; - } - //*/ + } + //*/ #if defined __arm__ && DRAW_TO_SCREEN frameSurface = (unsigned *)PresentDrawingSurface(); #endif @@ -351,13 +395,12 @@ void execute(void (* function)(), float * data) } -//#def USE_LLVM_EXECUTIONENGINE 1 #if USE_LLVM_EXECUTIONENGINE #include <llvm/ExecutionEngine/JIT.h> #include <llvm/Target/TargetSelect.h> -void jit(llvm::Module * mod) +void jit(llvm::Module * mod, gl_shader * shader) { #ifndef __arm__ __attribute__ ((aligned (16))) // LLVM generates movaps on X86, needs 16 bytes align @@ -413,40 +456,59 @@ void jit(llvm::Module * mod) static void* symbolLookup(void* pContext, const char* name) { - float * data = (float *)pContext; + gl_shader * shader = (gl_shader *)pContext; + const GGLContext * gglCtx = (const GGLContext *)shader->Program; + + float * data = (float *)shader->Source; void * symbol = (void*)dlsym(RTLD_DEFAULT, name); if (NULL == symbol) { - if (0 == strcmp("gl_FragColor", name)) +// if (0 == strcmp("gl_FragColor", name)) +// symbol = data + 0; +// else if (0 == strcmp("gl_FragCoord", name)) +// symbol = data + 4; +// else if (0 == strcmp("gl_FrontFacing", name)) +// symbol = data + 8; +// else if (0 == strcmp("vTexCoord", name)) { +// symbol = data + 12; +// *(data + 12) = 1.1; +// *(data + 13) = 1.2; +// *(data + 14) = 1.3; +// *(data + 15) = 1; +// } else if (0 == strcmp("uRotM", name)) { +// symbol = data + 16; +// memset(data + 16, 0, 16 * sizeof(*data)); +// data[16] = data[21] = data[26] = data[31] = 1; +// data[28] = 11; +// data[29] = 22; +// data[30] = 33; +// //data[31] = 44; +// } else if (0 == strcmp("uFragmentColor", name)) { +// symbol = data + 32; +// data[32] = 1.57075f; +// data[33] = 1.57075f; +// data[34] = 1.57075f; +// data[35] = 1.57075f; +// } else if (0 == strcmp("t", name)) { +// symbol = data + 36; +// data[36] = 0.1f; +// } + + if (!strcmp("gl_FragColor", name)) symbol = data + 0; - else if (0 == strcmp("gl_FragCoord", name)) - symbol = data + 4; - else if (0 == strcmp("gl_FrontFacing", name)) - symbol = data + 8; - else if (0 == strcmp("vTexCoord", name)) { - symbol = data + 12; - *(data + 12) = 1.1; - *(data + 13) = 1.2; - *(data + 14) = 1.3; - *(data + 15) = 1; - } else if (0 == strcmp("uRotM", name)) { - symbol = data + 16; - memset(data + 16, 0, 16 * sizeof(*data)); - data[16] = data[21] = data[26] = data[31] = 1; - data[28] = 11; - data[29] = 22; - data[30] = 33; - //data[31] = 44; - } else if (0 == strcmp("uFragmentColor", name)) { - symbol = data + 32; - data[32] = 1.57075f; - data[33] = 1.57075f; - data[34] = 1.57075f; - data[35] = 1.57075f; - } else if (0 == strcmp("t", name)) { - symbol = data + 36; - data[36] = 0.1f; - } - + else if (!strcmp(_PF2_TEXTURE_DATA_NAME_, name)) + symbol = (void *)gglCtx->textureState.textureData; + else if (!strcmp(_PF2_TEXTURE_DIMENSIONS_NAME_, name)) + symbol = (void *)gglCtx->textureState.textureDimensions; + else + { + ir_variable * var = shader->symbols->get_variable(name); + if (-1 == var->location) + var->location = shader->SourceChecksum++; + else + shader->SourceChecksum = MAX2(var->location + var->type->matrix_columns, shader->SourceChecksum); + symbol = data + 4 * 1 + var->location * 4; + printf("'%s' at %d \n", var->name, var->location); + }; } printf("symbolLookup '%s'=%p \n", name, symbol); //getchar(); @@ -454,7 +516,7 @@ static void* symbolLookup(void* pContext, const char* name) return symbol; } -void jit(llvm::Module * mod) +void jit(llvm::Module * mod, gl_shader * shader) { #ifndef __arm__ __attribute__ ((aligned (16))) // LLVM generates movaps on X86, needs 16 bytes align @@ -462,11 +524,16 @@ void jit(llvm::Module * mod) float data [64]; memset(data, 0xff, sizeof(data)); + assert(!shader->Source); + shader->Source = (char *)data; // i/o pool + assert(!shader->Program); + shader->Program = (gl_program *)ggl; // pass in context + BCCScriptRef script = bccCreateScript(); bccReadModule(script, "glsl", (LLVMModuleRef)mod, 0); int result = 0; assert(0 == bccGetError(script)); - bccRegisterSymbolCallback(script, symbolLookup, data); + bccRegisterSymbolCallback(script, symbolLookup, shader); assert(0 == bccGetError(script)); bccPrepareExecutable(script, NULL, 0); result = bccGetError(script); @@ -482,27 +549,35 @@ void jit(llvm::Module * mod) fprintf(stderr, "Could not find '%s': %d\n", "main", result); else printf("bcc_compile %s=%p \n", "main", function); - execute(function, data); + + execute(function, shader); + + shader->Source = NULL; + shader->Program = NULL; } #endif -struct _mesa_glsl_parse_state * global_state = NULL; - int main(int argc, char **argv) { + static char texturePath [256] = {0}; + static char shaderPath [256] = {0}; + static const char shaderFile[] = "fs.frag"; + static const char textureFile[] = "android.tga"; + + memcpy(texturePath, argv[0], strlen(argv[0])); + char * slash = texturePath + strlen(texturePath); + while (*slash != '/' && slash >= texturePath) + slash--; + memcpy(slash + 1, textureFile, strlen(textureFile)); + memcpy(shaderPath, texturePath, slash - texturePath + 1); + memcpy(shaderPath + (slash - texturePath) + 1, shaderFile, strlen(shaderFile)); + //* if (1 == argc) { argc = 6; - char shader_file_path[256] = {0}; - memcpy(shader_file_path, argv[0], strlen(argv[0])); - char * slash = shader_file_path + strlen(shader_file_path); - while (*slash != '/') - slash--; - const char shader_file[] = "stress_fs.frag"; - memcpy(slash + 1, shader_file, strlen(shader_file)); - const char * args [] = {argv[0], "--dump-hir", "--do-jit", "--link", "--glsl-es", shader_file_path}; + const char * args [] = {argv[0], "--dump-hir", "--do-jit", "--link", "--glsl-es", shaderPath}; argv = (char **)args; } //*/ @@ -576,25 +651,37 @@ main(int argc, char **argv) puts("jit"); + ggl = CreateGGLInterface(); + GGLTexture texture = {0}; + LoadTGA(texturePath, &texture.width, &texture.height, reinterpret_cast<void **>(&texture.levels)); + texture.format = GGL_PIXEL_FORMAT_RGBA_8888; + texture.type = GL_TEXTURE_2D; + texture.levelCount = 1; + texture.wrapS = texture.wrapT = 0; // repeat = 0 fastest, clamp = 1, mirrored = 2 + texture.minFilter = texture.magFilter = 0; // nearest = 0, linear = 1 + ggl->SetSampler(ggl, 0, &texture); + for (unsigned i = 0; do_jit && i < MESA_SHADER_TYPES; i++) { struct gl_shader *shader = whole_program->_LinkedShaders[i]; if (!shader) continue; - global_state = new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader); exec_list * ir = shader->ir; do_mat_op_to_vec(ir); puts("\n *** IR for JIT *** \n"); - _mesa_print_ir(ir, global_state); + _mesa_print_ir(ir, NULL); - llvm::Module * module = glsl_ir_to_llvm_module(ir); + llvm::Module * module = glsl_ir_to_llvm_module(ir, (GGLContext *)ggl); assert(module); puts("\n *** Module for JIT *** \n"); module->dump(); - jit(module); + jit(module, shader); puts("jitted"); } + + free(texture.levels); + DestroyGGLInterface((GGLInterface *)ggl); for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) hieralloc_free(whole_program->_LinkedShaders[i]); diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h index 08ad5f3..71d07d5 100644 --- a/src/mesa/main/glheader.h +++ b/src/mesa/main/glheader.h @@ -50,16 +50,65 @@ #define GL_GLEXT_PROTOTYPES -#include "GL/gl.h" -#include "GL/glext.h" - +#include "GLES2/gl2.h" +#include "GLES2/gl2ext.h" /** * GL_FIXED is defined in glext.h version 64 but these typedefs aren't (yet). */ -typedef int GLfixed; +//typedef int GLfixed; typedef int GLclampx; - +typedef double GLdouble; +typedef unsigned short GLhalfARB; +typedef uint64_t GLuint64; +typedef GLdouble GLclampd; +typedef int GLsizeiptrARB; +typedef GLuint64 GLuint64EXT; +typedef int GLintptrARB; + +#define GLAPIENTRYP * + +#define GL_POLYGON 0x0009 +#define GL_GEOMETRY_SHADER 0x8DD9 +#define GL_SAMPLER_1D 0x8B5D +#define GL_SAMPLER_1D_SHADOW 0x8B61 +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_SAMPLER_3D 0x8B5F + +#define GL_SAMPLER_2D_RECT 0x8B63 +#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64 +#define GL_SAMPLER_BUFFER 0x8DC2 +#define GL_INT_SAMPLER_2D_RECT 0x8DCD +#define GL_INT_SAMPLER_BUFFER 0x8DD0 +#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8 + +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A + +#define GL_SAMPLER_1D_ARRAY 0x8DC0 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 +#define GL_INT_SAMPLER_1D 0x8DC9 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1 +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 #ifndef GL_OES_EGL_image typedef void *GLeglImageOES; diff --git a/src/pixelflinger2/buffer.cpp b/src/pixelflinger2/buffer.cpp new file mode 100644 index 0000000..1c5c18c --- /dev/null +++ b/src/pixelflinger2/buffer.cpp @@ -0,0 +1,225 @@ +/** + ** + ** Copyright 2010, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "src/pixelflinger2/pixelflinger2.h" + +#include <assert.h> +#include <string.h> + +void SetShaderVerifyFunctions(GGLInterface *); + +static void DepthFunc(GGLInterface * iface, GLenum func) +{ + GGL_GET_CONTEXT(ctx, iface); + if (GL_NEVER > func || GL_ALWAYS < func) + return gglError(GL_INVALID_ENUM); + ctx->bufferState.depthFunc = func & 0x7; + SetShaderVerifyFunctions(iface); +} + +static void StencilFuncSeparate(GGLInterface * iface, GLenum face, GLenum func, GLint ref, GLuint mask) +{ + GGL_GET_CONTEXT(ctx, iface); + if (GL_FRONT > face || GL_FRONT_AND_BACK < face) + return gglError(GL_INVALID_ENUM); + if (GL_NEVER > func || GL_ALWAYS < func) + return gglError(GL_INVALID_ENUM); + mask &= 0xff; + ref = MAX2(MIN2(ref, 0xff), 0); + ref &= mask; + if (GL_FRONT == face || GL_FRONT_AND_BACK == face) + { + ctx->frontStencil.ref = ref; + ctx->frontStencil.mask = mask; + ctx->frontStencil.func = func & 0x7; + } + if (GL_BACK == face || GL_FRONT_AND_BACK == face) + { + ctx->backStencil.ref = ref; + ctx->backStencil.mask = mask; + ctx->backStencil.func = func & 0x7; + } + SetShaderVerifyFunctions(iface); +} + +static unsigned StencilOpEnum(GLenum func, unsigned oldValue) +{ + switch (func) + { + case GL_ZERO: return 0; + case GL_KEEP: // fall through + case GL_REPLACE: // fall through + case GL_INCR: // fall through + case GL_DECR: return func - GL_KEEP + 1; break; + case GL_INVERT: return 5; + case GL_INCR_WRAP: return 6; + case GL_DECR_WRAP: return 7; + default: gglError(GL_INVALID_ENUM); return oldValue; + } +} + +static void StencilOpSeparate(GGLInterface * iface, GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass) +{ + GGL_GET_CONTEXT(ctx, iface); + if (GL_FRONT > face || GL_FRONT_AND_BACK < face) + return gglError(GL_INVALID_ENUM); + if (GL_FRONT == face || GL_FRONT_AND_BACK == face) + { + ctx->frontStencil.sFail = StencilOpEnum(sfail, ctx->frontStencil.sFail); + ctx->frontStencil.dFail = StencilOpEnum(dpfail, ctx->frontStencil.dFail); + ctx->frontStencil.dPass = StencilOpEnum(dppass, ctx->frontStencil.dPass); + } + if (GL_BACK == face || GL_FRONT_AND_BACK == face) + { + ctx->backStencil.sFail = StencilOpEnum(sfail, ctx->backStencil.sFail); + ctx->backStencil.dFail = StencilOpEnum(dpfail, ctx->backStencil.dFail); + ctx->backStencil.dPass = StencilOpEnum(dppass, ctx->backStencil.dPass); + } + SetShaderVerifyFunctions(iface); +} + +static void StencilSelect(const GGLInterface * iface, GLenum face) +{ + GGL_GET_CONTEXT(ctx, iface); + if (GL_FRONT == face) + { + ctx->activeStencil.face = 0; + ctx->activeStencil.ref = ctx->frontStencil.ref; + ctx->activeStencil.mask = ctx->frontStencil.mask; + } + else if (GL_BACK == face) + { + ctx->activeStencil.face = 1; + ctx->activeStencil.ref = ctx->backStencil.ref; + ctx->activeStencil.mask = ctx->backStencil.mask; + } +} + +static void ClearStencil(GGLInterface * iface, GLint s) +{ + GGL_GET_CONTEXT(ctx, iface); + ctx->clearState.stencil = 0x01010101 * ((unsigned &)s & 0xff); +} + +static void ClearColor(GGLInterface * iface, GLclampf r, GLclampf g, GLclampf b, GLclampf a) +{ + GGL_GET_CONTEXT(ctx, iface); + r = MAX2(MIN2(r, 1.0f), 0); + g = MAX2(MIN2(g, 1.0f), 0); + b = MAX2(MIN2(b, 1.0f), 0); + a = MAX2(MIN2(a, 1.0f), 0); + ctx->clearState.color = (unsigned(a * 255) << 24) | (unsigned(b * 255) << 16) | + (unsigned(g * 255) << 8) | unsigned(r * 255); +} + +static void ClearDepthf(GGLInterface * iface, GLclampf d) +{ + GGL_GET_CONTEXT(ctx, iface); + // assuming ieee 754 32 bit float and 32 bit 2's complement int + assert(sizeof(d) == sizeof(ctx->clearState.depth)); + ctx->clearState.depth = (int &)d; // bit reinterpretation + if (0x80000000 & ctx->clearState.depth) // smaller negative float has bigger int representation, so flip + ctx->clearState.depth ^= 0x7fffffff; // since -FLT_MAX is close to -1 when bitcasted +} + +static void Clear(const GGLInterface * iface, GLbitfield buf) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + + // TODO DXL scissor test + if (GL_COLOR_BUFFER_BIT & buf) + { + assert(GGL_PIXEL_FORMAT_RGBA_8888 == ctx->frameSurface.format); + unsigned * const end = (unsigned *)ctx->frameSurface.data + + ctx->frameSurface.width * ctx->frameSurface.height; + const unsigned color = ctx->clearState.color; + for (unsigned * start = (unsigned *)ctx->frameSurface.data; start < end; start++) + *start = color; + } + if (GL_DEPTH_BUFFER_BIT & buf) + { + assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format); + unsigned * const end = (unsigned *)ctx->depthSurface.data + + ctx->depthSurface.width * ctx->depthSurface.height; + const unsigned depth = ctx->clearState.depth; + for (unsigned * start = (unsigned *)ctx->depthSurface.data; start < end; start++) + *start = depth; + } + if (GL_STENCIL_BUFFER_BIT & buf) + { + assert(GGL_PIXEL_FORMAT_S_8 == ctx->stencilSurface.format); + unsigned * const end = (unsigned *)((unsigned char *)ctx->stencilSurface.data + + ctx->stencilSurface.width * ctx->stencilSurface.height); + unsigned * start = (unsigned *)ctx->stencilSurface.data; + const unsigned stencil = ctx->clearState.stencil; + for (start; start < end; start++) + *start = stencil; + start--; + for (unsigned char * i = (unsigned char *)start; i < (unsigned char *)end; i++) + *i = stencil & 0xff; + } +} + +static void SetBuffer(GGLInterface * iface, const GLenum type, GGLSurface * surface) +{ + GGL_GET_CONTEXT(ctx, iface); + if (GL_COLOR_BUFFER_BIT == type) + { + if (surface) + { + ctx->frameSurface = *surface; + assert(GGL_PIXEL_FORMAT_RGBA_8888 == ctx->frameSurface.format); + } + else + memset(&ctx->frameSurface, 0, sizeof(ctx->frameSurface)); + } + else if (GL_DEPTH_BUFFER_BIT == type) + { + if (surface) + { + ctx->depthSurface = *surface; + assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format); + } + else + memset(&ctx->depthSurface, 0, sizeof(ctx->depthSurface)); + } + else if (GL_STENCIL_BUFFER_BIT == type) + { + if (surface) + { + ctx->stencilSurface = *surface; + assert(GGL_PIXEL_FORMAT_S_8 == ctx->stencilSurface.format); + } + else + memset(&ctx->stencilSurface, 0, sizeof(ctx->stencilSurface)); + } + else + gglError(GL_INVALID_ENUM); +} + +void InitializeBufferFunctions(GGLInterface * iface) +{ + iface->DepthFunc = DepthFunc; + iface->StencilFuncSeparate = StencilFuncSeparate; + iface->StencilOpSeparate = StencilOpSeparate; + iface->StencilSelect = StencilSelect; + iface->ClearStencil = ClearStencil; + iface->ClearColor = ClearColor; + iface->ClearDepthf = ClearDepthf; + iface->Clear = Clear; + iface->SetBuffer = SetBuffer; +}
\ No newline at end of file diff --git a/src/pixelflinger2/pixelflinger2.cpp b/src/pixelflinger2/pixelflinger2.cpp new file mode 100644 index 0000000..06d3443 --- /dev/null +++ b/src/pixelflinger2/pixelflinger2.cpp @@ -0,0 +1,238 @@ +/** + ** + ** Copyright 2010, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "pixelflinger2.h" + +//#include "src/pixelflinger2/texture.h" + +//#include "src/mesa/main/context.h" + +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> + +void gglError(unsigned error) +{ + assert(0); +} + +static void DepthRangef(GGLInterface * iface, GLclampf zNear, GLclampf zFar) +{ + GGL_GET_CONTEXT(ctx, iface); + ctx->viewport.n = VectorComp_t_CTR((zNear + zFar) / 2); + ctx->viewport.f = VectorComp_t_CTR((zFar - zNear) / 2); +} + +static void Viewport(GGLInterface * iface, GLint x, GLint y, GLsizei width, GLsizei height) +{ + GGL_GET_CONTEXT(ctx, iface); + ctx->viewport.x = VectorComp_t_CTR(x + width / 2); + ctx->viewport.y = VectorComp_t_CTR(y + height / 2); + ctx->viewport.w = VectorComp_t_CTR(width / 2); + ctx->viewport.h = VectorComp_t_CTR(height / 2); +} + +static void CullFace(GGLInterface * iface, GLenum mode) +{ + GGL_GET_CONTEXT(ctx, iface); + if (GL_FRONT > mode || GL_FRONT_AND_BACK < mode) + gglError(GL_INVALID_ENUM); + else + ctx->cullState.cullFace = mode - GL_FRONT; +} + +static void FrontFace(GGLInterface * iface, GLenum mode) +{ + GGL_GET_CONTEXT(ctx, iface); + if (GL_CW > mode || GL_CCW < mode) + gglError(GL_INVALID_ENUM); + else + ctx->cullState.frontFace = mode - GL_CW; +} + +static void BlendColor(GGLInterface * iface, GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha) +{ + GGL_GET_CONTEXT(ctx, iface); +#if USE_LLVM_SCANLINE + ctx->blendState.color[0] = MIN2(MAX2(red * 255, 0.0f), 255.0f); + ctx->blendState.color[1] = MIN2(MAX2(green * 255, 0.0f), 255.0f); + ctx->blendState.color[2] = MIN2(MAX2(blue * 255, 0.0f), 255.0f); + ctx->blendState.color[3] = MIN2(MAX2(alpha * 255, 0.0f), 255.0f); +#else + ctx->blendState.color.r = MIN2(MAX2(red * 255, 0), 255); + ctx->blendState.color.g = MIN2(MAX2(green * 255, 0), 255); + ctx->blendState.color.b = MIN2(MAX2(blue * 255, 0), 255); + ctx->blendState.color.a = MIN2(MAX2(alpha * 255, 0), 255); +#endif + SetShaderVerifyFunctions(iface); +} + +static void BlendEquationSeparate(GGLInterface * iface, GLenum modeRGB, GLenum modeAlpha) +{ + GGL_GET_CONTEXT(ctx, iface); + if (GL_FUNC_ADD != modeRGB && (GL_FUNC_SUBTRACT > modeRGB || + GL_FUNC_REVERSE_SUBTRACT < modeRGB)) + return gglError(GL_INVALID_ENUM); + if (GL_FUNC_ADD != modeRGB && (GL_FUNC_SUBTRACT > modeRGB || + GL_FUNC_REVERSE_SUBTRACT < modeRGB)) + return gglError(GL_INVALID_ENUM); + ctx->blendState.ce = modeRGB - GL_FUNC_ADD; + ctx->blendState.ae = modeAlpha - GL_FUNC_ADD; + SetShaderVerifyFunctions(iface); +} + +static void BlendFuncSeparate(GGLInterface * iface, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha) +{ + GGL_GET_CONTEXT(ctx, iface); + if (GL_ZERO != srcRGB && GL_ONE != srcRGB && + (GL_SRC_COLOR > srcRGB || GL_SRC_ALPHA_SATURATE < srcRGB) && + (GL_CONSTANT_COLOR > srcRGB || GL_ONE_MINUS_CONSTANT_ALPHA < srcRGB)) + return gglError(GL_INVALID_ENUM); + if (GL_ZERO != srcAlpha && GL_ONE != srcAlpha && + (GL_SRC_COLOR > srcAlpha || GL_SRC_ALPHA_SATURATE < srcAlpha) && + (GL_CONSTANT_COLOR > dstRGB || GL_ONE_MINUS_CONSTANT_ALPHA < dstRGB)) + return gglError(GL_INVALID_ENUM); + if (GL_ZERO != dstRGB && GL_ONE != dstRGB && + (GL_SRC_COLOR > dstRGB || GL_ONE_MINUS_DST_COLOR < dstRGB) && // GL_SRC_ALPHA_SATURATE only for source + (GL_CONSTANT_COLOR > dstRGB || GL_ONE_MINUS_CONSTANT_ALPHA < dstRGB)) + return gglError(GL_INVALID_ENUM); + if (GL_ZERO != dstAlpha && GL_ONE != dstAlpha && + (GL_SRC_COLOR > dstAlpha || GL_ONE_MINUS_DST_COLOR < dstAlpha) && + (GL_CONSTANT_COLOR > dstRGB || GL_ONE_MINUS_CONSTANT_ALPHA < dstRGB)) + return gglError(GL_INVALID_ENUM); + if (srcAlpha == GL_SRC_ALPHA_SATURATE) // it's just 1 instead of min(sa, 1 - da) for alpha channel + srcAlpha = GL_ONE; + // in c++ it's templated function for color and alpha, + // so it requires setting srcAlpha to GL_ONE to run template again only for alpha + ctx->blendState.scf = srcRGB <= GL_ONE ? srcRGB : + (srcRGB <= GL_SRC_ALPHA_SATURATE ? srcRGB - GL_SRC_COLOR + 2 + : srcRGB - GL_CONSTANT_COLOR + 11); + + ctx->blendState.saf = srcAlpha <= GL_ONE ? srcAlpha : + (srcAlpha <= GL_SRC_ALPHA_SATURATE ? srcAlpha - GL_SRC_COLOR + 2 + : srcAlpha - GL_CONSTANT_COLOR + 11); + + ctx->blendState.dcf = dstRGB <= GL_ONE ? dstRGB : + (dstRGB <= GL_SRC_ALPHA_SATURATE ? dstRGB - GL_SRC_COLOR + 2 + : dstRGB - GL_CONSTANT_COLOR + 11); + + ctx->blendState.daf = dstAlpha <= GL_ONE ? dstAlpha : + (dstAlpha <= GL_SRC_ALPHA_SATURATE ? dstAlpha - GL_SRC_COLOR + 2 + : dstAlpha - GL_CONSTANT_COLOR + 11); + + SetShaderVerifyFunctions(iface); + +} + +static void EnableDisable(GGLInterface * iface, GLenum cap, GLboolean enable) +{ + GGL_GET_CONTEXT(ctx, iface); + bool changed = false; + switch (cap) { + case GL_BLEND: + changed |= ctx->blendState.enable ^ enable; + ctx->blendState.enable = enable; + break; + case GL_CULL_FACE: + changed |= ctx->cullState.enable ^ enable; + ctx->cullState.enable = enable; + break; + case GL_DEPTH_TEST: + changed |= ctx->bufferState.depthTest ^ enable; + ctx->bufferState.depthTest = enable; + break; + case GL_STENCIL_TEST: + changed |= ctx->bufferState.stencilTest ^ enable; + ctx->bufferState.stencilTest = enable; + break; + default: + gglError(GL_INVALID_ENUM); + break; + } + if (changed) + SetShaderVerifyFunctions(iface); +} + +static void InitializeGGLState(GGLInterface * iface) +{ + iface->DepthRangef = DepthRangef; + iface->Viewport = Viewport; + iface->CullFace = CullFace; + iface->FrontFace = FrontFace; + iface->BlendColor = BlendColor; + iface->BlendEquationSeparate = BlendEquationSeparate; + iface->BlendFuncSeparate = BlendFuncSeparate; + iface->EnableDisable = EnableDisable; + + InitializeBufferFunctions(iface); + InitializeRasterFunctions(iface); + InitializeScanLineFunctions(iface); + InitializeShaderFunctions(iface); + InitializeTextureFunctions(iface); + + iface->EnableDisable(iface, GL_DEPTH_TEST, false); + iface->DepthFunc(iface, GL_LESS); + iface->ClearColor(iface, 0, 0, 0, 0); + iface->ClearDepthf(iface, 1.0f); + + iface->EnableDisable(iface, GL_STENCIL_TEST, false); + iface->StencilFuncSeparate(iface, GL_FRONT_AND_BACK, GL_ALWAYS, 0, 0xff); + iface->StencilOpSeparate(iface, GL_FRONT_AND_BACK, GL_KEEP, GL_KEEP, GL_KEEP); + + iface->FrontFace(iface, GL_CCW); + iface->CullFace(iface, GL_BACK); + iface->EnableDisable(iface, GL_CULL_FACE, false); + + iface->EnableDisable(iface, GL_BLEND, false); + iface->BlendColor(iface, 0, 0, 0, 0); + iface->BlendEquationSeparate(iface, GL_FUNC_ADD, GL_FUNC_ADD); + iface->BlendFuncSeparate(iface, GL_ONE, GL_ZERO, GL_ONE, GL_ZERO); + + for (unsigned i = 0; i < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; i++) + iface->SetSampler(iface, i, NULL); + + SetShaderVerifyFunctions(iface); +} + +GGLInterface * CreateGGLInterface() +{ + //GGLContext * ctx = (GGLContext *)calloc(1, sizeof(GGLContext) + sizeof(__GLcontextRec)); + GGLContext * ctx = (GGLContext *)calloc(1, sizeof(GGLContext)); + if (!ctx) + return NULL; + assert((void *)ctx == (void *)&ctx->interface); + //ctx->glCtx = (GLcontext *)((char *)ctx + sizeof(GGLContext)); + + //_glapi_set_context(ctx->glCtx); + //_mesa_init_constants(&Const); + + puts("InitializeGGLState"); + InitializeGGLState(&ctx->interface); + return &ctx->interface; +} + +void DestroyGGLInterface(GGLInterface * iface) +{ + GGLContext * ctx = (GGLContext *)iface; + assert((void *)ctx == (void *)iface); + + DestroyShaderFunctions(iface); + + ctx->glCtx = NULL; + + free(ctx); +}
\ No newline at end of file diff --git a/src/pixelflinger2/pixelflinger2.h b/src/pixelflinger2/pixelflinger2.h new file mode 100644 index 0000000..c1259a2 --- /dev/null +++ b/src/pixelflinger2/pixelflinger2.h @@ -0,0 +1,160 @@ +/** + ** + ** Copyright 2010, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#ifndef _PIXELFLINGER2_H_ +#define _PIXELFLINGER2_H_ + +#define USE_LLVM_TEXTURE_SAMPLER 1 +#define USE_LLVM_SCANLINE 1 +#define USE_LLVM_EXECUTIONENGINE 0 // 1 to use llvm::Execution, 0 to use libBCC, requires modifying makefile + +#define debug_printf printf + +#include "pixelflinger2/pixelflinger2_interface.h" + +#ifndef MIN2 +# define MIN2(a, b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef MAX2 +# define MAX2(a, b) ((a) > (b) ? (a) : (b)) +#endif + +namespace llvm { + class LLVMContext; +}; + +#if !USE_LLVM_SCANLINE +typedef int BlendComp_t; +#endif + +#define GGL_GET_CONTEXT(context, interface) GGLContext * context = (GGLContext *)interface; +#define GGL_GET_CONST_CONTEXT(context, interface) const GGLContext * context = \ + (const GGLContext *)interface; (void)context; + +struct GGLContext +{ + GGLInterface interface; // must be first member so that GGLContext * == GGLInterface * + + GGLSurface frameSurface; + GGLSurface depthSurface; + GGLSurface stencilSurface; + + struct __GLcontextRec * glCtx; // mesa constants and others used for shader compiling and executing + llvm::LLVMContext * llvmCtx; + + struct + { + int depth; // assuming ieee 754 32 bit float and 32 bit 2's complement int; z_32 + unsigned color; // clear value; rgba_8888 + unsigned stencil; // s_8; repeated to clear 4 pixels at a time + } clearState; + + struct StencilState + { + unsigned char ref, mask; // ref is masked during StencilFuncSeparate + + // GL_NEVER = 0, GL_LESS, GL_EQUAL, GL_LEQUAL, GL_GREATER, GL_NOTEQUAL, GL_GEQUAL, + // GL_ALWAYS; value = GLenum & 0x7 (GLenum is 0x200-0x207) + unsigned char func; // compare function + + // GL_ZERO = 0, GL_KEEP = 1, GL_REPLACE, GL_INCR, GL_DECR, GL_INVERT, GL_INCR_WRAP, + // GL_DECR_WRAP = 7; value = 0 | GLenum - GL_KEEP | GL_INVERT | GLenum - GL_INCR_WRAP + unsigned char sFail, dFail, dPass; // operations + } frontStencil, backStencil; // all affect scanline jit + + mutable struct ActiveStencilState // do not change layout, used in GenerateScanLine + { + unsigned char face; // FRONT = 0, BACK = 1 + unsigned char ref, mask; + } activeStencil; // after primitive assembly, call StencilSelect + + struct BufferState // all affect scanline jit + { + unsigned stencilTest : 1; + unsigned depthTest : 1; + // same as sf/bFunc; GL_NEVER = 0, GL_LESS, GL_EQUAL, GL_LEQUAL, GL_GREATER, GL_NOTEQUAL, + // GL_GEQUAL, GL_ALWAYS = 7; value = GLenum & 0x7 (GLenum is 0x200-0x207) + unsigned depthFunc : 3; + } bufferState; + + struct BlendState // all values affect scanline jit + { +#if USE_LLVM_SCANLINE + unsigned char color[4]; // rgba[0,255] +#else + Vec4<BlendComp_t> color; +#endif + + unsigned scf : 4, saf : 4, dcf : 4, daf : 4; // GL_ZERO = 0, GL_ONE, GL_SRC_COLOR = 2, + // GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, + // GL_DST_ALPHA, GL_ONE_MINUS_DST_ALPHA, GL_DST_COLOR, GL_ONE_MINUS_DST_COLOR, + // GL_SRC_ALPHA_SATURATE, GL_CONSTANT_COLOR = 11, GL_ONE_MINUS_CONSTANT_COLOR, + // GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA; + // value = 0,1 | GLenum - GL_SRC_COLOR + 2 | GLenum - GL_CONSTANT_COLOR + 11 + + unsigned ce : 3, ae : 3; // GL_FUNC_ADD = 0, GL_FUNC_SUBTRACT = 4, + // GL_FUNC_REVERSE_SUBTRACT = 5; value = GLenum - GL_FUNC_ADD + + unsigned enable : 1; + } blendState; + + struct + { + // format affects vs and fs jit + GGLTexture textures[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]; // the active samplers + // array of pointers to texture surface data; used by LLVM generated texture sampler + void * textureData[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS]; + // array of texture dimensions; used by LLVM generated texture sampler + unsigned textureDimensions[GGL_MAXCOMBINEDTEXTUREIMAGEUNITS * 2]; + } textureState; + + // called by ShaderUse to set to proper rendering functions + void (* PickScanLine)(GGLInterface * iface); + void (* PickRaster)(GGLInterface * iface); + + // viewport params are transformed so that Zw = Zd * f + n + // and Xw/Yw = x/y + Xd/Yd * w/h + struct { VectorComp_t x, y, w, h, n, f; } viewport; // should be moved into libAgl2 + + struct // should be moved into libAgl2 + { + unsigned enable : 1; + unsigned frontFace : 1; // GL_CW = 0, GL_CCW, actual value is GLenum - GL_CW + unsigned cullFace : 2; // GL_FRONT = 0, GL_BACK, GL_FRONT_AND_BACK, value = GLenum - GL_FRONT + } cullState; +}; + +#define _PF2_TEXTURE_DATA_NAME_ "gl_PF2TEXTURE_DATA" /* sampler data pointers used by LLVM */ +#define _PF2_TEXTURE_DIMENSIONS_NAME_ "gl_PF2TEXTURE_DIMENSIONS" /* sampler dimensions used by LLVM */ + +void gglError(unsigned error); // not implmented, just an assert + +// they just set the function pointers +void InitializeBufferFunctions(GGLInterface * iface); +void InitializeRasterFunctions(GGLInterface * iface); +void InitializeScanLineFunctions(GGLInterface * iface); +void InitializeTextureFunctions(GGLInterface * iface); + +void InitializeShaderFunctions(GGLInterface * iface); // set function pointers and create needed objects +void SetShaderVerifyFunctions(GGLInterface * iface); // called by state change functions +void DestroyShaderFunctions(GGLInterface * iface); // destroy needed objects +// actual gl_shader and gl_shader_program is created and destroyed by ShaderCreate/Free, +// and ShaderProgramCreate/Free. + + + +#endif // #ifndef _PIXELFLINGER2_H_ diff --git a/src/pixelflinger2/raster.cpp b/src/pixelflinger2/raster.cpp new file mode 100644 index 0000000..cc2695c --- /dev/null +++ b/src/pixelflinger2/raster.cpp @@ -0,0 +1,358 @@ +/** + ** + ** Copyright 2010, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include <stdlib.h> +#include <assert.h> +#include <math.h> + +#include "pixelflinger2.h" + +#ifdef SHADER_SOA +static struct tgsi_exec_machine machine; +#endif + +static inline void LerpVector4(const Vector4 * a, const Vector4 * b, + const VectorComp_t x, Vector4 * d) __attribute__((always_inline)); +static inline void LerpVector4(const Vector4 * a, const Vector4 * b, + const VectorComp_t x, Vector4 * d) +{ + assert(a != d && b != d); + //d = (b - a) * x + a; + (*d) = (*b); + (*d) -= (*a); + (*d) *= x; + (*d) += (*a); +} + +static inline void InterpolateVertex(const VertexOutput * a, const VertexOutput * b, const VectorComp_t x, + VertexOutput * v, const unsigned varyingCount) +{ + LerpVector4(&a->position, &b->position, x, &v->position); + for (unsigned i = 0; i < varyingCount; i++) + LerpVector4(a->varyings + i, b->varyings + i, x, v->varyings + i); + LerpVector4(&a->frontFacingPointCoord, &b->frontFacingPointCoord, + x, &v->frontFacingPointCoord); // gl_PointCoord + v->frontFacingPointCoord.y = a->frontFacingPointCoord.y; // gl_FrontFacing not interpolated + +} + +static void ProcessVertex(const GGLInterface * iface, const VertexInput * input, + VertexOutput * output) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + +//#if !USE_LLVM_TEXTURE_SAMPLER +// extern const GGLContext * textureGGLContext; +// textureGGLContext = ctx; +//#endif +// +// const Vector4 * constants = (Vector4 *) +// ctx->glCtx->Shader.CurrentProgram->VertexProgram->Parameters->ParameterValues; +// ctx->glCtx->Shader.CurrentProgram->GLVMVP->function(input, output, constants); +// +//#if !USE_LLVM_TEXTURE_SAMPLER +// textureGGLContext = NULL; +//#endif +} + +static void RasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl, + const VertexOutput * tr, const VertexOutput * bl, + const VertexOutput * br) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + + assert(tl->position.x <= tr->position.x && bl->position.x <= br->position.x); + assert(tl->position.y <= bl->position.y && tr->position.y <= br->position.y); + assert(fabs(tl->position.y - tr->position.y) < 1 && fabs(bl->position.y - br->position.y) < 1); + + const unsigned width = ctx->frameSurface.width, height = ctx->frameSurface.height; + const unsigned varyingCount = 0;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; + + + // tlv-trv and blv-brv are parallel and horizontal + VertexOutput tlv(*tl), trv(*tr), blv(*bl), brv(*br); + VertexOutput tmp; + + // vertically clip + if ((int)tlv.position.y < 0) + { + InterpolateVertex(&tlv, &blv, (0 - tlv.position.y) / (blv.position.y - tlv.position.y), + &tmp, varyingCount); + tlv = tmp; + } + if ((int)trv.position.y < 0) + { + InterpolateVertex(&trv, &brv, (0 - trv.position.y) / (brv.position.y - trv.position.y), + &tmp, varyingCount); + trv = tmp; + } + if ((int)blv.position.y >= (int)height) + { + InterpolateVertex(&tlv, &blv, (height - 1 - tlv.position.y) / (blv.position.y - tlv.position.y), + &tmp, varyingCount); + blv = tmp; + } + if ((int)brv.position.y >= (int)height) + { + InterpolateVertex(&trv, &brv, (height - 1 - trv.position.y) / (brv.position.y - trv.position.y), + &tmp, varyingCount); + brv = tmp; + } + + // horizontally clip + if ((int)tlv.position.x < 0) + { + InterpolateVertex(&tlv, &trv, (0 - tlv.position.x) / (trv.position.x - tlv.position.x), + &tmp, varyingCount); + tlv = tmp; + } + if ((int)blv.position.x < 0) + { + InterpolateVertex(&blv, &brv, (0 - blv.position.x) / (brv.position.x - blv.position.x), + &tmp, varyingCount); + blv = tmp; + } + if ((int)trv.position.x >= (int)width) + { + InterpolateVertex(&tlv, &trv, (width - 1 - tlv.position.x) / (trv.position.x - tlv.position.x), + &tmp, varyingCount); + trv = tmp; + } + if ((int)brv.position.x >= (int)width) + { + InterpolateVertex(&blv, &brv, (width - 1 - blv.position.x) / (brv.position.x - blv.position.x), + &tmp, varyingCount); + brv = tmp; + } + + const unsigned int startY = tlv.position.y; + const unsigned int endY = blv.position.y; + + if (endY < startY) + return; + + const VectorComp_t yDistInv = VectorComp_t_CTR(1.0f / (endY - startY)); + + // bV and cV are left and right vertices on a horizontal line in quad + // bDx and cDx are iterators from tlv to blv, trv to brv for bV and cV + + VertexOutput bV(tlv), cV(trv); + VertexOutput bDx(blv), cDx(brv); + + for (unsigned i = 0; i < varyingCount; i++) + { + bDx.varyings[i] -= tlv.varyings[i]; + bDx.varyings[i] *= yDistInv; + + cDx.varyings[i] -= trv.varyings[i]; + cDx.varyings[i] *= yDistInv; + } + + bDx.position -= tlv.position; + bDx.position *= yDistInv; + + cDx.position -= trv.position; + cDx.position *= yDistInv; + + bDx.frontFacingPointCoord -= tlv.frontFacingPointCoord; // gl_PointCoord + bDx.frontFacingPointCoord *= yDistInv; + bDx.frontFacingPointCoord.y = VectorComp_t_Zero; // gl_FrontFacing not interpolated + cDx.frontFacingPointCoord -= trv.frontFacingPointCoord; // gl_PointCoord + cDx.frontFacingPointCoord *= yDistInv; + cDx.frontFacingPointCoord.y = VectorComp_t_Zero; // gl_FrontFacing not interpolated + + for (unsigned y = startY; y <= endY; y++) + { + iface->ScanLine(iface, &bV, &cV); + + for (unsigned i = 0; i < varyingCount; i++) + { + bV.varyings[i] += bDx.varyings[i]; + cV.varyings[i] += cDx.varyings[i]; + } + + bV.position += bDx.position; + cV.position += cDx.position; + + bV.frontFacingPointCoord += bDx.frontFacingPointCoord; + cV.frontFacingPointCoord += cDx.frontFacingPointCoord; + } +} + +static void RasterTriangle(const GGLInterface * iface, const VertexOutput * v1, + const VertexOutput * v2, const VertexOutput * v3) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + const unsigned varyingCount = 0;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; + const unsigned height = ctx->frameSurface.height; + const VertexOutput * a = v1, * b = v2, * d = v3; + //abc is a triangle, bcd is another triangle, they share bc as horizontal edge + //c is between a and d, xy is screen coord + + //first sort 3 vertices by MIN y first + if (v2->position.y < v1->position.y) + { + a = v2; + b = v1; + } + if (v3->position.y < a->position.y) + { + d = b; + b = a; + a = v3; + } + else if (v3->position.y < b->position.y) + { + d = b; + b = v3; + } + + assert(a->position.y <= b->position.y && b->position.y <= d->position.y); + + VertexOutput cVertex; + const VertexOutput* c = &cVertex; + + const VectorComp_t cLerp = (b->position.y - a->position.y) / + MAX2(VectorComp_t_One, (d->position.y - a->position.y)); + // create 4th vertex, same y as b to form two triangles/trapezoids sharing horizontal edge + InterpolateVertex(a, d, cLerp, &cVertex, varyingCount); + + if (c->position.x < b->position.x) + { + const VertexOutput * tmp = c; + c = b; + b = tmp; + } + + if ((int)a->position.y < (int)height && (int)b->position.y >= 0) + RasterTrapezoid(iface, a, a, b, c); + //b->position.y += VectorComp_t_One; + //c->position.y += VectorComp_t_One; + if ((int)b->position.y < (int)height && (int)d->position.y >= 0) + RasterTrapezoid(iface, b, c, d, d); +} + +static void DrawTriangle(const GGLInterface * iface, const VertexInput * vin1, + const VertexInput * vin2, const VertexInput * vin3) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + + VertexOutput vouts[3]; + VertexOutput * v1 = vouts + 0, * v2 = vouts + 1, * v3 = vouts + 2; + +#ifdef SHADER_SOA + assert(0); // not implemented +#endif + + iface->ProcessVertex(iface, vin1, v1); + iface->ProcessVertex(iface, vin2, v2); + iface->ProcessVertex(iface, vin3, v3); + + v1->position /= v1->position.w; + v2->position /= v2->position.w; + v3->position /= v3->position.w; + + iface->ViewportTransform(iface, &v1->position); + iface->ViewportTransform(iface, &v2->position); + iface->ViewportTransform(iface, &v3->position); + + VectorComp_t area; + area = v1->position.x * v2->position.y - v2->position.x * v1->position.y; + area += v2->position.x * v3->position.y - v3->position.x * v2->position.y; + area += v3->position.x * v1->position.y - v1->position.x * v3->position.y; + area *= 0.5f; + + if (GL_CCW == ctx->cullState.frontFace + GL_CW) + (unsigned &)area ^= 0x80000000; + + if (ctx->cullState.enable) + { + switch(ctx->cullState.cullFace + GL_FRONT) + { + case GL_FRONT: + if (!((unsigned &)area & 0x80000000)) // +ve, front facing + return; + break; + case GL_BACK: + if ((unsigned &)area & 0x80000000) // -ve, back facing + return; + break; + case GL_FRONT_AND_BACK: + return; + default: + assert(0); + } + } + + v1->frontFacingPointCoord.y = v2->frontFacingPointCoord.y = + v3->frontFacingPointCoord.y = !((unsigned &)area & 0x80000000) ? + VectorComp_t_One : VectorComp_t_Zero; + + iface->StencilSelect(iface, ((unsigned &)area & 0x80000000) ? GL_BACK : GL_FRONT); + +// if (0) +// { +// GGLContext * ctx =(GGLContext *)iface; +// for (unsigned sampler = 0; sampler < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; sampler++) +// { +// if (!((1 << sampler) & ctx->glCtx->Shader.CurrentProgram->FragmentProgram->SamplersUsed)) +// continue; +// const GGLTexture * texture = ctx->textureState.textures + sampler; +// int level = texture->width * texture->height / (area * 2) - 4; +// assert(texture->levels); +// ctx->textureState.textureData[sampler] = texture->levels[0]; +// ctx->textureState.textureDimensions[sampler * 2] = texture->width; +// ctx->textureState.textureDimensions[sampler * 2 + 1] = texture->height; +// for (unsigned i = 1; i < texture->levelCount && i <= level; i++) +// { +// ctx->textureState.textureData[sampler] = texture->levels[i]; +// ctx->textureState.textureDimensions[sampler * 2] += 1; +// ctx->textureState.textureDimensions[sampler * 2] /= 2; +// ctx->textureState.textureDimensions[sampler * 2 + 1] += 1; +// ctx->textureState.textureDimensions[sampler * 2 + 1] /= 2; +// } +// } +// } + + // TODO DXL view frustum clipping + iface->RasterTriangle(iface, v1, v2, v3); + +} + +static void PickRaster(GGLInterface * iface) +{ + iface->ProcessVertex = ProcessVertex; + iface->DrawTriangle = DrawTriangle; + iface->RasterTriangle = RasterTriangle; + iface->RasterTrapezoid = RasterTrapezoid; +} + +static void ViewportTransform(const GGLInterface * iface, Vector4 * v) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + v->x = v->x * ctx->viewport.w + ctx->viewport.x; + v->y = v->y * ctx->viewport.h + ctx->viewport.y; + v->z = v->z * ctx->viewport.f + ctx->viewport.n; +} + + +void InitializeRasterFunctions(GGLInterface * iface) +{ + GGL_GET_CONTEXT(ctx, iface); + ctx->PickRaster = PickRaster; + iface->ViewportTransform = ViewportTransform; +}
\ No newline at end of file diff --git a/src/pixelflinger2/scanline.cpp b/src/pixelflinger2/scanline.cpp new file mode 100644 index 0000000..52ff2d2 --- /dev/null +++ b/src/pixelflinger2/scanline.cpp @@ -0,0 +1,535 @@ +/** + ** + ** Copyright 2010, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "src/pixelflinger2/pixelflinger2.h" +#include "src/pixelflinger2/texture.h" + +//#include "src/gallivm/gallivm_p.h" +//#include "src/shader/prog_parameter.h" + +#include <assert.h> +#include <stdio.h> + +#if !USE_LLVM_SCANLINE + +static void Saturate(Vec4<BlendComp_t> * color) +{ + color->r = MIN2(MAX2(color->r, 0), 255); + color->g = MIN2(MAX2(color->g, 0), 255); + color->b = MIN2(MAX2(color->b, 0), 255); + color->a = MIN2(MAX2(color->a, 0), 255); +} + +static inline void RGBAIntToRGBAIntx4(unsigned rgba, Vec4<BlendComp_t> * color) __attribute__((always_inline)); +static inline void RGBAIntToRGBAIntx4(unsigned rgba, Vec4<BlendComp_t> * color) +{ + color->r = rgba & 0xff; + color->g = (rgba >>= 8) & 0xff; + color->b = (rgba >>= 8) & 0xff; + color->a = (rgba >>= 8); +} + +static inline void RGBAFloatx4ToRGBAIntx4(Vector4 * v, Vec4<BlendComp_t> * color) +{ + color->r = v->r * 255; + color->g = v->g * 255; + color->b = v->b * 255; + color->a = v->a * 255; +} + +static inline unsigned RGBAIntx4ToRGBAInt(const Vec4<BlendComp_t> * color); +static inline unsigned RGBAIntx4ToRGBAInt(const Vec4<BlendComp_t> * color) +{ + return color->r | (color->g << 8) | (color->b << 16) | (color->a << 24); +} + + + +//static inline Pixel Vector4ToPixelRGBA(const Vector4 * color) __attribute__((always_inline)); +//static inline Pixel Vector4ToPixelRGBA(const Vector4 * color) +//{ +// Pixel pixel; +//#if defined(__ARM_HAVE_NEON) && USE_NEON +// int32x4_t c = vcvtq_s32_f32(vmulq_n_f32(color->f4, 255.0f)); +// c = vminq_s32(c, vdupq_n_s32(255)); +// c = vmaxq_s32(c, vdupq_n_s32(0)); +// pixel.channels[0] = (unsigned char)vgetq_lane_s32(c, 0); +// pixel.channels[1] = (unsigned char)vgetq_lane_s32(c, 1); +// pixel.channels[2] = (unsigned char)vgetq_lane_s32(c, 2); +// pixel.channels[3] = (unsigned char)vgetq_lane_s32(c, 3); +//#else +// pixel.channels[0] = (unsigned char)MIN2(MAX2((short)(color->r * 255), 0), 255); +// pixel.channels[1] = (unsigned char)MIN2(MAX2((short)(color->g * 255), 0), 255); +// pixel.channels[2] = (unsigned char)MIN2(MAX2((short)(color->b * 255), 0), 255); +// pixel.channels[3] = (unsigned char)MIN2(MAX2((short)(color->a * 255), 0), 255); +//#endif //#if USE_FIXED_POINT +// return pixel; +//} + +template<typename T> +static inline void BlendFactor(const unsigned mode, T & factor, const T & src, + const T & dst, const T & constant, const T & one, + const T & zero, const BlendComp_t & srcA, const BlendComp_t & dstA, + const BlendComp_t & constantA, const BlendComp_t & sOne) __attribute__((always_inline)); +template<typename T> +static inline void BlendFactor(const unsigned mode, T & factor, const T & src, + const T & dst, const T & constant, const T & one, + const T & zero, const BlendComp_t & srcA, const BlendComp_t & dstA, + const BlendComp_t & constantA, const BlendComp_t & sOne) +{ + switch (mode) + { + case 0: // GL_ZERO + factor = zero; + return; + case 1: // GL_ONE + factor = one; + return; + case 2: // GL_SRC_COLOR: + factor = src; + return; + case 3: // GL_ONE_MINUS_SRC_COLOR: + factor = one; + factor -= src; + return; + case 4: // GL_DST_COLOR: + factor = dst; + return; + case 5: // GL_ONE_MINUS_DST_COLOR: + factor = one; + factor -= dst; + return; + case 6: // GL_SRC_ALPHA: + factor = srcA; + return; + case 7: // GL_ONE_MINUS_SRC_ALPHA: + factor = sOne - srcA; + return; + case 8: // GL_DST_ALPHA: + factor = dstA; + return; + case 9: // GL_ONE_MINUS_DST_ALPHA: + factor = sOne - dstA; + return; + case 10: // GL_SRC_ALPHA_SATURATE: // valid only for source color; src alpha = 1 + factor = MIN2(srcA, sOne - dstA); + return; + case 11: // GL_CONSTANT_COLOR: + factor = constant; + return; + case 12: // GL_ONE_MINUS_CONSTANT_COLOR: + factor = one; + factor -= constant; + return; + case 13: // GL_CONSTANT_ALPHA: + factor = constantA; + return; + case 14: // GL_ONE_MINUS_CONSTANT_ALPHA: + factor = sOne - constantA; + return; + default: + assert(0); + return; + } +} +#endif // #if !USE_LLVM_SCANLINE + +unsigned char StencilOp(const unsigned op, unsigned char s, const unsigned char ref) +{ + switch (op) + { + case 0: // GL_ZERO + return 0; + case 1: // GL_KEEP + return s; + case 2: // GL_REPLACE + return ref; + case 3: // GL_INCR + if (s < 255) + return ++s; + return s; + case 4: // GL_DECR + if (s > 0) + return --s; + return 0; + case 5: // GL_INVERT + return ~s; + case 6: // GL_INCR_WRAP + return ++s; + case 7: // GL_DECR_WRAP + return --s; + default: assert(0); return s; + } +} + +template <bool StencilTest, bool DepthTest, bool DepthWrite, bool BlendEnable> +void ScanLine(const GGLInterface * iface, const VertexOutput * v1, const VertexOutput * v2) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + // assert((unsigned)v1->position.y == (unsigned)v2->position.y); + // + // assert(GGL_PIXEL_FORMAT_RGBA_8888 == ctx->frameSurface.format); + // assert(GGL_PIXEL_FORMAT_Z_32 == ctx->depthSurface.format); + // assert(ctx->frameSurface.width == ctx->depthSurface.width); + // assert(ctx->frameSurface.height == ctx->depthSurface.height); + + const unsigned int varyingCount = 0;//ctx->glCtx->Shader.CurrentProgram->Varying->NumParameters; + const unsigned y = v1->position.y, startX = v1->position.x, + endX = v2->position.x; + + //assert(ctx->frameSurface.width > startX && ctx->frameSurface.width > endX); + //assert(ctx->frameSurface.height > y); + + unsigned * frame = (unsigned *)ctx->frameSurface.data + + y * ctx->frameSurface.width + startX; + const VectorComp_t div = VectorComp_t_CTR(1 / (float)(endX - startX)); + + VertexOutput vertex(*v1); + VertexOutput vertexDx(*v2); + + vertexDx.position -= v1->position; + vertexDx.position *= div; + //printf("vertexDx.position.z=%.8g \n", vertexDx.position.z); + for (unsigned i = 0; i < varyingCount; i++) + { + vertexDx.varyings[i] -= v1->varyings[i]; + vertexDx.varyings[i] *= div; + } + vertexDx.frontFacingPointCoord -= v1->frontFacingPointCoord; + vertexDx.frontFacingPointCoord *= div; // gl_PointCoord, only zw + vertexDx.frontFacingPointCoord.y = 0; // gl_FrontFacing not interpolated + +#if USE_FORCED_FIXEDPOINT + for (unsigned j = 0; j < 4; j++) + { + for (unsigned i = 0; i < varyingCount; i++) + { + vertex.varyings[i].i[j] = vertex.varyings[i].f[j] * 65536; + vertexDx.varyings[i].i[j] = vertexDx.varyings[i].f[j] * 65536; + } + vertex.position.i[j] = vertex.position.f[j] * 65536; + vertexDx.position.i[j] = vertexDx.position.f[j] * 65536; + vertex.frontFacingPointCoord.i[j] = vertex.frontFacingPointCoord.f[j] * 65536; + } +#endif + + int * depth = (int *)ctx->depthSurface.data + y * ctx->frameSurface.width + startX; + unsigned char * stencil = (unsigned char *)ctx->stencilSurface.data + y * ctx->frameSurface.width + startX; + +#if !USE_LLVM_TEXTURE_SAMPLER + extern const GGLContext * textureGGLContext; + textureGGLContext = ctx; +#endif + + // TODO DXL consider inverting gl_FragCoord.y + +#if USE_LLVM_SCANLINE + typedef void (* ScanLineFunction_t)(VertexOutput * start, VertexOutput * step, + Vector4 * constants, unsigned * frame, + int * depth, unsigned char * stencil, + GGLContext::ActiveStencilState *, + unsigned count); + +// ScanLineFunction_t scanLineFunction = (ScanLineFunction_t) +// ctx->glCtx->Shader.CurrentProgram->GLVMFP->function; + if (endX >= startX) + { +// scanLineFunction(&vertex, &vertexDx, (Vector4 *) +// ctx->glCtx->Shader.CurrentProgram->FragmentProgram->Parameters->ParameterValues, +// frame, depth, stencil, &ctx->activeStencil, endX - startX + 1); + } +#else + + int z; + bool sCmp = true; // default passed, unless failed by stencil test + unsigned char s; // masked stored stencil value + const unsigned char sMask = ctx->activeStencil.mask; + const unsigned char sRef = ctx->activeStencil.ref; + const unsigned sFunc = ctx->activeStencil.face ? 0x200 | ctx->backStencil.func : + 0x200 | ctx->frontStencil.func; + const unsigned ssFail = ctx->activeStencil.face ? ctx->backStencil.sFail : + ctx->frontStencil.sFail; + const unsigned sdFail = ctx->activeStencil.face ? ctx->backStencil.dFail : + ctx->frontStencil.dFail; + const unsigned sdPass = ctx->activeStencil.face ? ctx->backStencil.dPass : + ctx->frontStencil.dPass; + + for (unsigned x = startX; x <= endX; x++) + { + //assert(abs((int)(vertex.position.x) - (int)x) < 2); + //assert((unsigned)vertex.position.y == y); + if (StencilTest) + { + s = *stencil & sMask; + switch (sFunc) + { + case GL_NEVER: sCmp = false; break; + case GL_LESS: sCmp = sRef < s; break; + case GL_EQUAL: sCmp = sRef == s; break; + case GL_LEQUAL: sCmp = sRef <= s; break; + case GL_GREATER: sCmp = sRef > s; break; + case GL_NOTEQUAL: sCmp = sRef != s; break; + case GL_GEQUAL: sCmp = sRef >= s; break; + case GL_ALWAYS: sCmp = true; break; + default: assert(0); break; + } + } + + if (!StencilTest || sCmp) + { + z = vertex.position.i[2]; + if (z & 0x80000000) // negative float has leading 1 + z ^= 0x7fffffff; // bigger negative is smaller + bool zCmp = false; + switch (0x200 | ctx->bufferState.depthFunc) + { + case GL_NEVER: zCmp = false; break; + case GL_LESS: zCmp = z < *depth; break; + case GL_EQUAL: zCmp = z == *depth; break; + case GL_LEQUAL: zCmp = z <= *depth; break; + case GL_GREATER: zCmp = z > *depth; break; + case GL_NOTEQUAL: zCmp = z != *depth; break; + case GL_GEQUAL: zCmp = z >= *depth; break; + case GL_ALWAYS: zCmp = true; break; + default: assert(0); break; + } + if (!DepthTest || zCmp) + { + ShaderFunction_t function = ctx->glCtx->Shader.CurrentProgram->GLVMFP->function; + const Vector4 * inputs = &vertex.position; + const Vector4 * constants = (Vector4 *)ctx->glCtx->Shader.CurrentProgram->FragmentProgram->Parameters->ParameterValues; + Vector4 * outputs = vertex.fragColor; + function(inputs, outputs, constants); + + const RegDesc & outputRegDesc = ctx->glCtx->Shader.CurrentProgram-> + GLVMFP->outputRegDesc; + if (BlendEnable) + { + BlendComp_t sOne = 255, sZero = 0; + Vec4<BlendComp_t> one = sOne, zero = sZero; + + Vec4<BlendComp_t> src; + if (outputRegDesc.IsInt32Color()) + RGBAIntToRGBAIntx4(vertex.fragColor[0].u[0], &src); + else if (outputRegDesc.IsVectorType(Float)) + RGBAFloatx4ToRGBAIntx4(&vertex.fragColor[0], &src); + else if (outputRegDesc.IsVectorType(Fixed8)) + { + src.u[0] = vertex.fragColor[0].u[0]; + src.u[1] = vertex.fragColor[0].u[1]; + src.u[2] = vertex.fragColor[0].u[2]; + src.u[3] = vertex.fragColor[0].u[3]; + } + else + assert(0); + + Vec4<BlendComp_t> dst; + unsigned dc = *frame; + dst.r = dc & 255; + dst.g = (dc >>= 8) & 255; + dst.b = (dc >>= 8) & 255; + dst.a = (dc >>= 8) & 255; + + Vec4<BlendComp_t> sf, df; + + BlendFactor(ctx->blendState.scf, sf, src, dst, + ctx->blendState.color, one, zero, src.a, dst.a, + ctx->blendState.color.a, sOne); + if (ctx->blendState.scf != ctx->blendState.saf) + BlendFactor(ctx->blendState.saf, sf.a, src.a, dst.a, + ctx->blendState.color.a, sOne, sZero, src.a, dst.a, + ctx->blendState.color.a, sOne); + BlendFactor(ctx->blendState.dcf, df, src, dst, + ctx->blendState.color, one, zero, src.a, dst.a, + ctx->blendState.color.a, sOne); + if (ctx->blendState.dcf != ctx->blendState.daf) + BlendFactor(ctx->blendState.daf, df.a, src.a, dst.a, + ctx->blendState.color.a, sOne, sZero, src.a, dst.a, + ctx->blendState.color.a, sOne); + + Vec4<BlendComp_t> sfs(sf), dfs(df); + sfs.LShr(7); sf += sfs; + dfs.LShr(7); df += dfs; + + src *= sf; + dst *= df; + Vec4<BlendComp_t> res(src); + switch (ctx->blendState.ce + GL_FUNC_ADD) + { + case GL_FUNC_ADD: + res += dst; + break; + case GL_FUNC_SUBTRACT: + res -= dst; + break; + case GL_FUNC_REVERSE_SUBTRACT: + res = dst; + res -= src; + break; + default: assert(0); break; + } + if (ctx->blendState.ce != ctx->blendState.ae) + switch (ctx->blendState.ce + GL_FUNC_ADD) + { + case GL_FUNC_ADD: + res.a = src.a + dst.a; + break; + case GL_FUNC_SUBTRACT: + res.a = src.a - dst.a; + break; + case GL_FUNC_REVERSE_SUBTRACT: + res.a = dst.a - src.a; + break; + default: assert(0); break; + } + + res.AShr(8); + Saturate(&res); + *frame = RGBAIntx4ToRGBAInt(&res); + } + else + { + if (outputRegDesc.IsInt32Color()) + *frame = vertex.fragColor[0].u[0]; + else if (outputRegDesc.IsVectorType(Float)) + { + Vec4<BlendComp_t> src; + RGBAFloatx4ToRGBAIntx4(&vertex.fragColor[0], &src); + Saturate(&src); + *frame = RGBAIntx4ToRGBAInt(&src); + } + else if (outputRegDesc.IsVectorType(Fixed16)) + { + Vec4<BlendComp_t> & src = (Vec4<BlendComp_t> &)vertex.fragColor[0]; + src.r = (src.r * 255 >> 16); + src.g = (src.g * 255 >> 16); + src.b = (src.b * 255 >> 16); + src.a = (src.a * 255 >> 16); + Saturate(&src); + *frame = RGBAIntx4ToRGBAInt(&src); + } + else if (outputRegDesc.IsVectorType(Fixed8)) + { + Vec4<BlendComp_t> & src = (Vec4<BlendComp_t> &)vertex.fragColor[0]; + Saturate(&src); + *frame = RGBAIntx4ToRGBAInt(&src); + } + else + assert(0); + } + + if (DepthWrite) + *depth = z; + if (StencilTest) + *stencil = StencilOp(sdPass, s, sRef); + } + else if (StencilTest) + *stencil = StencilOp(sdFail, s, sRef); + } + else if (StencilTest) + *stencil = StencilOp(ssFail, s, sRef); + + frame++; + depth++; + stencil++; + +#if USE_FORCED_FIXEDPOINT + for (unsigned j = 0; j < 4; j++) + { + if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesFragCoord) + vertex.position.i[j] += vertexDx.position.i[j]; + for (unsigned i = 0; i < varyingCount; i++) + vertex.varyings[i].i[j] += vertexDx.varyings[i].i[j]; + } + vertex.position.i[2] += vertexDx.position.i[2]; + if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesPointCoord) + { + vertex.frontFacingPointCoord.i[2] = vertexDx.frontFacingPointCoord.i[2]; + vertex.frontFacingPointCoord.i[3] = vertexDx.frontFacingPointCoord.i[3]; + } +#else + if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesFragCoord) + vertex.position += vertexDx.position; + else if (ctx->bufferState.depthTest) + vertex.position.z += vertexDx.position.z; + + for (unsigned i = 0; i < varyingCount; i++) + vertex.varyings[i] += vertexDx.varyings[i]; + if (ctx->glCtx->Shader.CurrentProgram->FragmentProgram->UsesPointCoord) + { + vertex.frontFacingPointCoord.z += vertexDx.frontFacingPointCoord.z; + vertex.frontFacingPointCoord.w += vertexDx.frontFacingPointCoord.w; + } +#endif // #if USE_FORCED_FIXEDPOINT + } + +#endif // #if USE_LLVM_SCANLINE + +#if !USE_LLVM_TEXTURE_SAMPLER + textureGGLContext = NULL; +#endif +} + +static void PickScanLine(GGLInterface * iface) +{ + GGL_GET_CONTEXT(ctx, iface); + + ctx->interface.ScanLine = NULL; + const bool DepthWrite = true; + if (ctx->bufferState.stencilTest) + { + if (ctx->bufferState.depthTest) + { + if (ctx->blendState.enable) + ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, true>; + else + ctx->interface.ScanLine = ScanLine<true, true, DepthWrite, false>; + } + else + { + if (ctx->blendState.enable) + ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, true>; + else + ctx->interface.ScanLine = ScanLine<true, false, DepthWrite, false>; + } + } + else + { + if (ctx->bufferState.depthTest) + { + if (ctx->blendState.enable) + ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, true>; + else + ctx->interface.ScanLine = ScanLine<false, true, DepthWrite, false>; + } + else + { + if (ctx->blendState.enable) + ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, true>; + else + ctx->interface.ScanLine = ScanLine<false, false, DepthWrite, false>; + } + } + + assert(ctx->interface.ScanLine); +} + +void InitializeScanLineFunctions(GGLInterface * iface) +{ + GGL_GET_CONTEXT(ctx, iface); + ctx->PickScanLine = PickScanLine; +}
\ No newline at end of file diff --git a/src/pixelflinger2/shader.cpp b/src/pixelflinger2/shader.cpp new file mode 100644 index 0000000..f3ae1c5 --- /dev/null +++ b/src/pixelflinger2/shader.cpp @@ -0,0 +1,436 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/pixelflinger2/pixelflinger2.h" + +#include <assert.h> +#include <stdio.h> + +#include <llvm/LLVMContext.h> + +static gl_shader * ShaderCreate(const GGLInterface * iface, GLenum type) +{ + if (GL_VERTEX_SHADER != type && GL_FRAGMENT_SHADER != type) + { + gglError(GL_INVALID_ENUM); + return NULL; + } +// gl_shader * shader = _mesa_new_shader(0, type); +// if(!shader) +// gglError(GL_OUT_OF_MEMORY); +// return shader; + return NULL; +} + +static GLboolean ShaderCompile(const GGLInterface * iface, gl_shader * shader, + const char * glsl, char ** infoLog) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + if (!glsl) + { + gglError(GL_INVALID_VALUE); + return GL_FALSE; + } +// shader->Source = glsl; +// _slang_compile(ctx->glCtx, shader); +// shader->Source = NULL; +// if (infoLog) +// *infoLog = shader->InfoLog; +// return shader->CompileStatus; + return GL_FALSE; +} + +static void ShaderFree(const GGLInterface * iface, gl_shader * shader) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// _mesa_free_shader(ctx->glCtx, shader); +} + +static gl_shader_program * ShaderProgramCreate(const GGLInterface * iface) +{ +// gl_shader_program * program = _mesa_new_shader_program(0); +// if (!program) +// gglError(GL_OUT_OF_MEMORY); +// return program; + return NULL; +} + +static GLboolean ShaderProgramLink(const GGLInterface * iface, gl_shader_program * program, + const unsigned count, gl_shader ** shaders, char ** infoLog) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + +// program->NumShaders = count; +// program->Shaders = shaders; +// +// _slang_link(ctx->glCtx, program); +// +// program->NumShaders = 0; +// program->Shaders = NULL; +// +// if (infoLog) +// *infoLog = program->InfoLog; +// +// return program->LinkStatus; + return GL_FALSE; +} + +struct gl_program; +struct ShaderKey; + +static void GetShaderKey(const GGLContext * ctx, const gl_program * shader, ShaderKey * key) +{ +// memset(key, 0, sizeof(*key)); +// if (GL_FRAGMENT_SHADER == shader->Target) +// { +// key->scanLineKey.frontStencil = ctx->frontStencil; +// key->scanLineKey.backStencil = ctx->backStencil; +// key->scanLineKey.bufferState = ctx->bufferState; +// key->scanLineKey.blendState = ctx->blendState; +// } +// +// for (unsigned i = 0; i < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; i++) +// if (shader->SamplersUsed & (1 << i)) +// { +// const GGLTexture & texture = ctx->textureState.textures[i]; +// key->textureFormats[i] = texture.format; +// assert((1 << 2) > texture.wrapS); +// key->textureParameters[i] |= texture.wrapS; +// assert((1 << 2) > texture.wrapT); +// key->textureParameters[i] |= texture.wrapT << 2; +// assert((1 << 3) > texture.minFilter); +// key->textureParameters[i] |= texture.minFilter << (2 + 2); +// assert((1 << 1) > texture.magFilter); +// key->textureParameters[i] |= texture.magFilter << (2 + 2 + 3); +// } +} + +static inline char HexDigit(unsigned char d) +{ + return (d > 9 ? d + 'A' - 10 : d + '0'); +} + +static const unsigned SHADER_KEY_STRING_LEN = GGL_MAXCOMBINEDTEXTUREIMAGEUNITS * 4 + 2; + +static void GetShaderKeyString(const GLenum type, const ShaderKey * key, + char * buffer, const unsigned bufferSize) +{ +// assert(1 == sizeof(char)); +// assert(0xff >= GGL_PIXEL_FORMAT_COUNT); +// assert(SHADER_KEY_STRING_LEN <= bufferSize); +// char * str = buffer; +// if (GL_VERTEX_SHADER == type) +// *str++ = 'v'; +// else if (GL_FRAGMENT_SHADER == type) +// *str++ = 'f'; +// else +// assert(0); +// for (unsigned i = 0; i < GGL_MAXCOMBINEDTEXTUREIMAGEUNITS; i++) +// { +// *str++ = HexDigit(key->textureFormats[i] / 16); +// *str++ = HexDigit(key->textureFormats[i] % 16); +// *str++ = HexDigit(key->textureParameters[i] / 16); +// *str++ = HexDigit(key->textureParameters[i] % 16); +// } +// *str++ = '\0'; +} + +//static const unsigned SCANLINE_KEY_STRING_LEN = 2 * sizeof(((ShaderKey *)0)->scanLineKey) + +// 3 + SHADER_KEY_STRING_LEN; + +static char * GetScanlineKeyString(const ShaderKey * key, char * buffer, + const unsigned bufferSize) +{ +// assert(1 == sizeof(char)); +// assert(0xff >= GGL_PIXEL_FORMAT_COUNT); +// assert(SCANLINE_KEY_STRING_LEN <= bufferSize); +// char * str = buffer; +// *str++ = 's'; +// const unsigned char * start = (const unsigned char *)&key->scanLineKey; +// const unsigned char * const end = start + sizeof(key->scanLineKey); +// for (; start < end; start++) +// { +// *str++ = HexDigit(*start / 16); +// *str++ = HexDigit(*start % 16); +// } +// GetShaderKeyString(GL_FRAGMENT_SHADER, key, str, bufferSize - (str - buffer)); +// return buffer; + return NULL; +} + +static void ShaderUse(GGLInterface * iface, gl_shader_program * program) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); + assert(program); + if (!program) + { +// ctx->glCtx->Shader.CurrentProgram = NULL; + // so drawing calls will do nothing until ShaderUse with a program + SetShaderVerifyFunctions(iface); + return; + } + +// if (program->VertexProgram) +// { +// if (!program->STVP) +// { +// program->STVP = CALLOC_STRUCT(st_vertex_program); +// program->STVP->Base = *program->VertexProgram; +// st_translate_vertex_program(ctx->glCtx, program->STVP, NULL, NULL, NULL); +// } +// +// _mesa_update_shader_textures_used(program->VertexProgram); +// +// ShaderKey shaderKey; +// GetShaderKey(ctx, program->VertexProgram, &shaderKey); +// ShaderFunction_t function = NULL; +// if (!program->GLVMVP || NULL == (function = program->GLVMVP->functions[shaderKey])) +// { +// char shaderName [SHADER_KEY_STRING_LEN] = {0}; +// GetShaderKeyString(GL_VERTEX_SHADER, &shaderKey, shaderName, Elements(shaderName)); +// create_program(program->STVP->state.tokens, GALLIVM_VS, &program->GLVMVP, +// &ctx->glCtx->Shader.cpu, ctx, program->VertexProgram, +// shaderName, NULL); +// program->GLVMVP->functions[shaderKey] = program->GLVMVP->function; +// debug_printf("jit new vertex shader %p \n", program->GLVMVP->function); //getchar(); +// } +// else +// { +// program->GLVMVP->function = function; +// //debug_printf("use cached vertex shader %p \n", function); +// } +// ctx->PickRaster(iface); +// } +// if (program->FragmentProgram) +// { +// if (!program->STFP) +// { +// program->STFP = CALLOC_STRUCT(st_fragment_program); +// program->STFP->Base = *program->FragmentProgram; +// st_translate_fragment_program(ctx->glCtx, program->STFP, NULL); +// } +// +// _mesa_update_shader_textures_used(program->FragmentProgram); +// +// ShaderKey shaderKey; +// GetShaderKey(ctx, program->FragmentProgram, &shaderKey); +// ShaderFunction_t function = NULL; +// if (!program->GLVMFP || NULL == (function = program->GLVMFP->functions[shaderKey])) +// { +// char shaderName [SHADER_KEY_STRING_LEN] = {0}; +// GetShaderKeyString(GL_FRAGMENT_SHADER, &shaderKey, shaderName, Elements(shaderName)); +// +// char scanlineName [SCANLINE_KEY_STRING_LEN] = {0}; +// GetScanlineKeyString(&shaderKey, scanlineName, Elements(scanlineName)); +// create_program(program->STFP->state.tokens, GALLIVM_FS, &program->GLVMFP, +// &ctx->glCtx->Shader.cpu, ctx, program->FragmentProgram, +// shaderName, scanlineName); +// program->GLVMFP->functions[shaderKey] = program->GLVMFP->function; +// debug_printf("jit new fragment shader %p \n", program->GLVMFP->function); +// } +// else +// { +// program->GLVMFP->function = function; +// //debug_printf("use cached fragment shader %p \n", function); +// } +// ctx->PickScanLine(iface); +// } +// ctx->glCtx->Shader.CurrentProgram = program; +} + +static void ShaderProgramFree(const GGLInterface * iface, gl_shader_program * program) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// if (ctx->glCtx->Shader.CurrentProgram == program) +// { +// ctx->glCtx->Shader.CurrentProgram = NULL; +// SetShaderVerifyFunctions(const_cast<GGLInterface *>(iface)); +// } +// assert(program); +// if (program->GLVMVP) +// gallivm_prog_delete(ctx->glCtx->Shader.cpu, program->GLVMVP); +// program->GLVMVP = NULL; +// if (program->GLVMFP) +// gallivm_prog_delete(ctx->glCtx->Shader.cpu, program->GLVMFP); +// program->GLVMFP = NULL; +// SAFE_FREE(program->STVP); +// SAFE_FREE(program->STFP); +// _mesa_free_shader_program(ctx->glCtx, program); +} + +static void ShaderAttributeBind(const GGLInterface * iface, const gl_shader_program * program, + GLuint index, const GLchar * name) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// _mesa_bind_attrib_location(ctx->glCtx, program, index, name, DEFAULTP); +} + +static GLint ShaderAttributeLocation(const GGLInterface * iface, const gl_shader_program * program, + const char * name) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// return _mesa_get_attrib_location(ctx->glCtx, program, name); + return -2; +} + +static GLint ShaderUniformLocation(const GGLInterface * iface, const gl_shader_program * program, + const char * name) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// return _mesa_get_shader_uniform_location(ctx->glCtx, program, name); + return -2; +} + +static void ShaderUniformGetfv(const GGLInterface * iface, gl_shader_program * program, + GLint location, GLfloat * params) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// _mesa_get_uniformfv(ctx->glCtx, program, location, params); +} + +static void ShaderUniformGetiv(const GGLInterface * iface, gl_shader_program * program, + GLint location, GLint * params) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// _mesa_get_uniformiv(ctx->glCtx, program, location, params); +} + +static GLint ShaderUniform(const GGLInterface * iface, gl_shader_program * program, + GLint location, GLsizei count, const GLvoid *values, GLenum type) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// if (!program) +// { +// gglError(GL_INVALID_OPERATION); +// return -2; +// } +// return _mesa_uniform(ctx->glCtx, program, location, count, values, type); + return -2; +} + +static void ShaderUniformMatrix(const GGLInterface * iface, gl_shader_program * program, + GLint cols, GLint rows, GLint location, GLsizei count, + GLboolean transpose, const GLfloat *values) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// if (!program) +// return gglError(GL_INVALID_OPERATION); +// _mesa_uniform_matrix(ctx->glCtx, program, cols, rows, location, count, transpose, values); +} + +static void ShaderVerifyProcessVertex(const GGLInterface * iface, const VertexInput * input, + VertexOutput * output) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// if (ctx->glCtx->Shader.CurrentProgram) +// { +// ShaderUse(const_cast<GGLInterface *>(iface), ctx->glCtx->Shader.CurrentProgram); +// if (ShaderVerifyProcessVertex != iface->ProcessVertex) +// iface->ProcessVertex(iface, input, output); +// } +} + +static void ShaderVerifyDrawTriangle(const GGLInterface * iface, const VertexInput * v0, + const VertexInput * v1, const VertexInput * v2) +{ +// GGL_GET_CONST_CONTEXT(ctx, iface); +// if (ctx->glCtx->Shader.CurrentProgram) +// { +// ShaderUse(const_cast<GGLInterface *>(iface), ctx->glCtx->Shader.CurrentProgram); +// if (ShaderVerifyDrawTriangle != iface->DrawTriangle) +// iface->DrawTriangle(iface, v0, v1, v2); +// } +} + +static void ShaderVerifyRasterTriangle(const GGLInterface * iface, const VertexOutput * v1, + const VertexOutput * v2, const VertexOutput * v3) +{ +// GGL_GET_CONST_CONTEXT(ctx, iface); +// if (ctx->glCtx->Shader.CurrentProgram) +// { +// ShaderUse(const_cast<GGLInterface *>(iface), ctx->glCtx->Shader.CurrentProgram); +// if (ShaderVerifyRasterTriangle != iface->RasterTriangle) +// iface->RasterTriangle(iface, v1, v2, v3); +// } +} + +static void ShaderVerifyRasterTrapezoid(const GGLInterface * iface, const VertexOutput * tl, + const VertexOutput * tr, const VertexOutput * bl, + const VertexOutput * br) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// if (ctx->glCtx->Shader.CurrentProgram) +// { +// ShaderUse(const_cast<GGLInterface *>(iface), ctx->glCtx->Shader.CurrentProgram); +// if (ShaderVerifyRasterTrapezoid != iface->RasterTrapezoid) +// iface->RasterTrapezoid(iface, tl, tr, bl, br); +// } +} + +static void ShaderVerifyScanLine(const GGLInterface * iface, const VertexOutput * v1, + const VertexOutput * v2) +{ + GGL_GET_CONST_CONTEXT(ctx, iface); +// if (ctx->glCtx->Shader.CurrentProgram) +// { +// ShaderUse(const_cast<GGLInterface *>(iface), ctx->glCtx->Shader.CurrentProgram); +// if (ShaderVerifyScanLine != iface->ScanLine) +// iface->ScanLine(iface, v1, v2); +// } +} + +// called after state changes so that drawing calls will trigger JIT +void SetShaderVerifyFunctions(struct GGLInterface * iface) +{ + iface->ProcessVertex = ShaderVerifyProcessVertex; + iface->DrawTriangle = ShaderVerifyDrawTriangle; + iface->RasterTriangle = ShaderVerifyRasterTriangle; + iface->RasterTrapezoid = ShaderVerifyRasterTrapezoid; + iface->ScanLine = ShaderVerifyScanLine; +} + +void InitializeShaderFunctions(struct GGLInterface * iface) +{ + GGL_GET_CONTEXT(ctx, iface); + puts("ctx->llvmCtx = new llvm::LLVMContext"); + ctx->llvmCtx = new llvm::LLVMContext(); + printf("ctx->llvmCtx=%p \n", ctx->llvmCtx); + + iface->ShaderCreate = ShaderCreate; + iface->ShaderCompile = ShaderCompile; + iface->ShaderFree = ShaderFree; + iface->ShaderProgramCreate = ShaderProgramCreate; + iface->ShaderProgramLink = ShaderProgramLink; + iface->ShaderUse = ShaderUse; + iface->ShaderProgramFree = ShaderProgramFree; + iface->ShaderAttributeBind = ShaderAttributeBind; + iface->ShaderAttributeLocation = ShaderAttributeLocation; + iface->ShaderUniformLocation = ShaderUniformLocation; + iface->ShaderUniformGetfv = ShaderUniformGetfv; + iface->ShaderUniformGetiv = ShaderUniformGetiv; + iface->ShaderUniform = ShaderUniform; + iface->ShaderUniformMatrix = ShaderUniformMatrix; +} + +void DestroyShaderFunctions(GGLInterface * iface) +{ + GGL_GET_CONTEXT(ctx, iface); +// if (ctx->glCtx->Shader.cpu) +// { +// gallivm_cpu_engine_delete(ctx->glCtx->Shader.cpu); +// ctx->glCtx->Shader.cpu = NULL; +// } +// SAFE_DELETE(ctx->llvmCtx); +}
\ No newline at end of file diff --git a/src/pixelflinger2/texture.cpp b/src/pixelflinger2/texture.cpp new file mode 100644 index 0000000..6d72ed2 --- /dev/null +++ b/src/pixelflinger2/texture.cpp @@ -0,0 +1,426 @@ +/** + ** + ** Copyright 2010, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "texture.h" + +#include <assert.h> +#include <string.h> +#include <math.h> + +#include "pixelflinger2.h" + +#if USE_LLVM_EXECUTIONENGINE +#include <llvm/Module.h> +#include <llvm/ExecutionEngine/JIT.h> +#include <llvm/DerivedTypes.h> +#endif + +#if !USE_LLVM_TEXTURE_SAMPLER + +const struct GGLContext * textureGGLContext; + +union Pixel { unsigned char channels[4]; unsigned int val; }; + +static inline void PixelRGBAToVector4 (const Pixel *pixel, Vector4 * color) __attribute__((always_inline)); +static inline void PixelRGBAToVector4 (const Pixel *pixel, Vector4 * color) +{ +#if defined(__ARM_HAVE_NEON) && USE_NEON + int32x4_t c; + c = vsetq_lane_s32(pixel->channels[0], c, 0); + c = vsetq_lane_s32(pixel->channels[1], c, 1); + c = vsetq_lane_s32(pixel->channels[2], c, 2); + c = vsetq_lane_s32(pixel->channels[3], c, 3); + color->f4 = vcvtq_f32_s32(c); + color->f4 = vmulq_n_f32(color->f4, 1 / 255.0f); +#else + color->r = (float)pixel->channels[0] / 255; + color->g = (float)pixel->channels[1] / 255; + color->b = (float)pixel->channels[2] / 255; + color->a = (float)pixel->channels[3] / 255; +#endif +} + +static inline void RGBAToVector4(const unsigned int rgba, Vector4 * color) +{ + PixelRGBAToVector4((const Pixel *)&rgba, color); +} + +static inline void Lerp(Vec4<int> * a, Vec4<int> * b, int x, Vec4<int> * d) +{ + for (unsigned i = 0; i < 4; i++) + { + int r = b->i[i] - a->i[i], s = a->i[i]; + d->i[i] = (r * x >> 16) + s; + } +} + +static inline void ToIntVec(Vec4<int> * a) +{ + a->u[3] = a->u[0] >> 24; + a->u[2] = (a->u[0] >> 16) & 0xff; + a->u[1] = (a->u[0] >> 8) & 0xff; + a->u[0] &= 0xff; +} + +template<GGLPixelFormat format> +static void PointSample(unsigned sample[4], const unsigned * data, const unsigned index) +{ + if (GGL_PIXEL_FORMAT_RGBA_8888 == format) + *sample = *(data + index); + else if (GGL_PIXEL_FORMAT_RGBX_8888 == format) + { + *sample = *(data + index); + *sample |= 0xff000000; + } + else if (GGL_PIXEL_FORMAT_RGB_565 == format) + { + sample[0] = *((const unsigned short *)data + index); + sample[1] = (sample[0] & 0x7e0) << 5; + sample[2] = (sample[0] & 0xf800) << 8; + sample[0] = (sample[0] & 0x1f) << 3; + + sample[0] |= sample[0] >> 5; + sample[1] = (sample[1] | (sample[1] >> 6)) & 0xff00; + sample[2] = (sample[2] | (sample[2] >> 5)) & 0xff0000; + + sample[0] |= sample[1]; + sample[0] |= sample[2]; + sample[0] |= 0xff000000; + } + else if (GGL_PIXEL_FORMAT_UNKNOWN == format) + sample[0] = 0xff00ffff; + else + assert(0); +} + +static unsigned texcoordWrap(const unsigned wrap, float r, const unsigned size, + unsigned * lerp) +{ + const unsigned shift = 16; + unsigned odd = 0; + int tc; + + tc = r * (1 << shift); + + odd = tc & (1 << shift); + if (0 == wrap || 2 == wrap) // REPEAT or MIRRORED + tc &= (1 << shift) - 1; // only take mantissa + tc *= size - 1; + // TODO DXL linear filtering needs to be fixed for texcoord outside of [0,1] + *lerp = tc & ((1 << shift) - 1); + tc >>= shift; + + if (0 == wrap) // GL_REPEAT + { } + else if (1 == wrap) // GL_CLAMP_TO_EDGE + tc = MIN2(size - 1, MAX2(0, tc)); + else if (2 == wrap) + tc = odd ? size - 1 - tc : tc; + else + assert(0); + return tc; +} + +template<GGLPixelFormat format, ChannelType output, unsigned minMag, unsigned wrapS, unsigned wrapT> +static void tex2d(unsigned sample[4], const float tex_coord[4], const unsigned sampler) +{ + const unsigned * data = (const unsigned *)textureGGLContext->textureState.textureData[sampler]; + const unsigned width = textureGGLContext->textureState.textureDimensions[sampler * 2]; + const unsigned height = textureGGLContext->textureState.textureDimensions[sampler * 2 + 1]; + unsigned xLerp = 0, yLerp = 0; + const unsigned x0 = texcoordWrap(wrapS, tex_coord[0], width, &xLerp); + const unsigned y0 = texcoordWrap(wrapT, tex_coord[1], height, &yLerp); + + if (0 == minMag) + { + PointSample<format>(sample, data, y0 * width + x0); + sample[1] = (sample[0] & 0xff00) >> 8; + sample[2] = (sample[0] & 0xff0000) >> 16; + sample[3] = (sample[0] & 0xff000000) >> 24; + sample[0] &= 0xff; + } + else if (1 == minMag) + { + const unsigned x1 = MIN2(width - 1, x0 + 1), y1 = MIN2(height - 1, y0 + 1); + Vec4<int> samples[4] = {0}; + PointSample<format>((unsigned *)(samples + 0), data, y0 * width + x0); + ToIntVec(samples + 0); + PointSample<format>((unsigned *)(samples + 1), data, y0 * width + x1); + ToIntVec(samples + 1); + PointSample<format>((unsigned *)(samples + 2), data, y1 * width + x1); + ToIntVec(samples + 2); + PointSample<format>((unsigned *)(samples + 3), data, y1 * width + x0); + ToIntVec(samples + 3); + + Lerp(samples + 0, samples + 1, xLerp, samples + 0); + Lerp(samples + 3, samples + 2, xLerp, samples + 3); + Lerp(samples + 0, samples + 3, yLerp, (Vec4<int> *)sample); + } + else + assert(0); + + if (Fixed0 == output) // i32 non vector + sample[0] = (sample[3] << 24) | (sample[2] << 16) | (sample[1] << 8) | sample[0]; + else if (Fixed8 == output) // 4 x i32 + ; // do nothing + else if (Fixed16 == output) // 4 x i32 + { + sample[0] <<= 8; sample[1] <<= 8; sample[2] <<= 8; sample[3] <<= 8; + } + else if (Float == output) // 4 x float + { + float * fsample = (float *)sample; + fsample[0] = sample[0] / 255.0f; fsample[1] = sample[1] / 255.0f; + fsample[2] = sample[2] / 255.0f; fsample[3] = sample[3] / 255.0f; + } +} + +template<GGLPixelFormat format, ChannelType output, unsigned minMag, unsigned wrapS, unsigned wrapT> +void texcube(unsigned sample[4], const float tex_coord[4], const unsigned sampler) +{ + float mx = fabs(tex_coord[0]), my = fabs(tex_coord[1]), mz = fabs(tex_coord[2]); + float s = 0, t = 0, ma = 0; + unsigned face = 0; + if (mx > my && mx > mz) + { + if (tex_coord[0] >= 0) + { + s = -tex_coord[2]; + t = -tex_coord[1]; + face = 0; + } + else + { + s = tex_coord[2]; + t = -tex_coord[1]; + face = 1; + } + ma = mx; + } + else if (my > mx && my > mz) + { + if (tex_coord[1] >= 0) + { + s = tex_coord[0]; + t = tex_coord[2]; + face = 2; + } + else + { + s = tex_coord[0]; + t = -tex_coord[2]; + face = 3; + } + ma = my; + } + else + { + if (tex_coord[2] >= 0) + { + s = tex_coord[0]; + t = -tex_coord[1]; + face = 4; + } + else + { + s = -tex_coord[0]; + t = -tex_coord[2]; + face = 5; + } + ma = mz; + } + + s = (s / ma + 1) * 0.5f; + t = (t / ma + 1) * 0.5f; + + const unsigned * data = (const unsigned *)textureGGLContext->textureState.textureData[sampler]; + const unsigned width = textureGGLContext->textureState.textureDimensions[sampler * 2]; + const unsigned height = textureGGLContext->textureState.textureDimensions[sampler * 2 + 1]; + unsigned xLerp = 0, yLerp = 0; + const unsigned x0 = texcoordWrap(wrapS, s, width, &xLerp); + const unsigned y0 = texcoordWrap(wrapT, t, height, &yLerp); + + if (0 == minMag) + { + PointSample<format>(sample, data, y0 * width + x0); + sample[1] = (sample[0] & 0xff00) >> 8; + sample[2] = (sample[0] & 0xff0000) >> 16; + sample[3] = (sample[0] & 0xff000000) >> 24; + sample[0] &= 0xff; + } + else if (1 == minMag) + { + const unsigned x1 = MIN2(width - 1, x0 + 1), y1 = MIN2(height - 1, y0 + 1); + Vec4<int> samples[4] = {0}; + PointSample<format>((unsigned *)(samples + 0), data, face * width * height + y0 * width + x0); + ToIntVec(samples + 0); + PointSample<format>((unsigned *)(samples + 1), data, face * width * height + y0 * width + x1); + ToIntVec(samples + 1); + PointSample<format>((unsigned *)(samples + 2), data, face * width * height + y1 * width + x1); + ToIntVec(samples + 2); + PointSample<format>((unsigned *)(samples + 3), data, face * width * height + y1 * width + x0); + ToIntVec(samples + 3); + + Lerp(samples + 0, samples + 1, xLerp, samples + 0); + Lerp(samples + 3, samples + 2, xLerp, samples + 3); + Lerp(samples + 0, samples + 3, yLerp, (Vec4<int> *)sample); + } + else + assert(0); + + if (Fixed0 == output) // i32 non vector + sample[0] = (sample[3] << 24) | (sample[2] << 16) | (sample[1] << 8) | sample[0]; + else if (Fixed8 == output) // 4 x i32 + ; // do nothing + else if (Fixed16 == output) // 4 x i32 + { + sample[0] <<= 8; sample[1] <<= 8; sample[2] <<= 8; sample[3] <<= 8; + } + else if (Float == output) // 4 x float + { + float * fsample = (float *)sample; + fsample[0] = sample[0] / 255.0f; fsample[1] = sample[1] / 255.0f; + fsample[2] = sample[2] / 255.0f; fsample[3] = sample[3] / 255.0f; + } + +} + +#define TEXTURE_FUNCTION_ENTRY(target,format,output,filter,wrapS,wrapT) \ +{ #target"_"#format"_"#output"_"#filter"_"#wrapS"_"#wrapT, \ +target<GGL_PIXEL_FORMAT_##format, output, filter, wrapS, wrapT> }, + +#define TEXTURE_FUNCTION_ENTRY_WRAPT(target,format,output,minMag,wrapS) \ +TEXTURE_FUNCTION_ENTRY(target,format,output,minMag,wrapS,0) \ +TEXTURE_FUNCTION_ENTRY(target,format,output,minMag,wrapS,1) \ +TEXTURE_FUNCTION_ENTRY(target,format,output,minMag,wrapS,2) + +#define TEXTURE_FUNCTION_ENTRY_WRAPS(target,format,output,minMag) \ +TEXTURE_FUNCTION_ENTRY_WRAPT(target,format,output,minMag,0) \ +TEXTURE_FUNCTION_ENTRY_WRAPT(target,format,output,minMag,1) \ +TEXTURE_FUNCTION_ENTRY_WRAPT(target,format,output,minMag,2) + +#define TEXTURE_FUNCTION_ENTRY_FILTER(target,format,output) \ +TEXTURE_FUNCTION_ENTRY_WRAPS(target,format,output,0) \ +TEXTURE_FUNCTION_ENTRY_WRAPS(target,format,output,1) + +#define TEXTURE_FUNCTION_ENTRY_OUTPUT(target,format) \ +TEXTURE_FUNCTION_ENTRY_FILTER(target,format,Float) \ +TEXTURE_FUNCTION_ENTRY_FILTER(target,format,Fixed16) \ +TEXTURE_FUNCTION_ENTRY_FILTER(target,format,Fixed8) \ +TEXTURE_FUNCTION_ENTRY_FILTER(target,format,Fixed0) + +#define TEXTURE_FUNCTION_ENTRY_FORMAT(target) \ +TEXTURE_FUNCTION_ENTRY_OUTPUT(target,RGBA_8888) \ +TEXTURE_FUNCTION_ENTRY_OUTPUT(target,RGBX_8888) \ +TEXTURE_FUNCTION_ENTRY_OUTPUT(target,RGB_565) \ +TEXTURE_FUNCTION_ENTRY_OUTPUT(target,UNKNOWN) + +#define TEXTURE_FUNCTION_ENTRIES \ +TEXTURE_FUNCTION_ENTRY_FORMAT(tex2d) \ +TEXTURE_FUNCTION_ENTRY_FORMAT(texcube) + +static struct TextureFunctionMapping +{ + const char * name; + void (* function)(unsigned sample[4], const float tex_coord[4], const unsigned int tex_id); +} textureFunctionMapping [] = { TEXTURE_FUNCTION_ENTRIES }; + + +#undef TEXTURE_FUNCTION_ENTRY + +#endif //#if !USE_LLVM_TEXTURE_SAMPLER + +#if USE_LLVM_EXECUTIONENGINE && !USE_LLVM_TEXTURE_SAMPLER + +void DeclareTextureFunctions(llvm::Module * mod) +{ + llvm::LLVMContext & llvm_ctx = mod->getContext(); + + std::vector<const llvm::Type*> funcArgs; + llvm::VectorType *vectorType = llvm::VectorType::get(llvm::Type::getFloatTy(llvm_ctx), 4); + llvm::PointerType * vectorPtr = llvm::PointerType::get(vectorType, 0); + + funcArgs.push_back(vectorPtr); + funcArgs.push_back(vectorPtr); + funcArgs.push_back(llvm::Type::getInt32Ty(llvm_ctx)); + // void function(float[4], const float[4], unsigned) + + llvm::FunctionType *functionType = llvm::FunctionType::get(llvm::Type::getVoidTy(llvm_ctx), + funcArgs, + false); + + for (unsigned i = 0; i < sizeof(textureFunctionMapping) / sizeof(*textureFunctionMapping); i++) + { + llvm::Function * func = llvm::cast<llvm::Function>( + mod->getOrInsertFunction(textureFunctionMapping[i].name, functionType)); + func->setLinkage(llvm::GlobalValue::ExternalLinkage); + func->setCallingConv(llvm::CallingConv::C); + } +} + +void AddTextureFunctionMappings(llvm::Module * mod, llvm::ExecutionEngine * ee) +{ + if (mod->getFunction("tex2d_soa")) + assert(0);//ee->addGlobalMapping(func, (void *)tex2d_soa); + + for (unsigned i = 0; i < sizeof(textureFunctionMapping) / sizeof(*textureFunctionMapping); i++) + { + llvm::Function * function = mod->getFunction(textureFunctionMapping[i].name); + if (function) + ee->updateGlobalMapping(function, (void *)textureFunctionMapping[i].function); + } +} +#endif // #if USE_LLVM_EXECUTIONENGINE && !USE_LLVM_TEXTURE_SAMPLER + +static void SetSampler(GGLInterface * iface, const unsigned sampler, GGLTexture * texture) +{ + assert(GGL_MAXCOMBINEDTEXTUREIMAGEUNITS > sampler); + GGL_GET_CONTEXT(ctx, iface); + if (!texture) + SetShaderVerifyFunctions(iface); + else if (ctx->textureState.textures[sampler].format != texture->format) + SetShaderVerifyFunctions(iface); + else if (ctx->textureState.textures[sampler].wrapS != texture->wrapS) + SetShaderVerifyFunctions(iface); + else if (ctx->textureState.textures[sampler].wrapT != texture->wrapT) + SetShaderVerifyFunctions(iface); + else if (ctx->textureState.textures[sampler].minFilter != texture->minFilter) + SetShaderVerifyFunctions(iface); + else if (ctx->textureState.textures[sampler].magFilter != texture->magFilter) + SetShaderVerifyFunctions(iface); + + if (texture) + { + ctx->textureState.textures[sampler] = *texture; // shallow copy, data pointed to must remain valid + //ctx->textureState.textureData[sampler] = texture->levels[0]; + ctx->textureState.textureData[sampler] = texture->levels; + ctx->textureState.textureDimensions[sampler * 2] = texture->width; + ctx->textureState.textureDimensions[sampler * 2 + 1] = texture->height; + } + else + { + memset(ctx->textureState.textures + sampler, 0, sizeof(ctx->textureState.textures[sampler])); + ctx->textureState.textureData[sampler] = NULL; + ctx->textureState.textureDimensions[sampler * 2] = 0; + ctx->textureState.textureDimensions[sampler * 2 + 1] = 0; + } +} + +void InitializeTextureFunctions(GGLInterface * iface) +{ + iface->SetSampler = SetSampler; +}
\ No newline at end of file diff --git a/src/pixelflinger2/texture.h b/src/pixelflinger2/texture.h new file mode 100644 index 0000000..a2b5407 --- /dev/null +++ b/src/pixelflinger2/texture.h @@ -0,0 +1,43 @@ +/** + ** + ** Copyright 2010, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#ifndef _TEXTURE_H_ +#define _TEXTURE_H_ + +#if !USE_LLVM_TEXTURE_SAMPLER + +namespace llvm +{ + class Module; + class ExecutionEngine; +}; + +extern const struct GGLContext * textureGGLContext; // for getting wrap mode, dimensions, data + +void DeclareTextureFunctions(llvm::Module * mod); +void AddTextureFunctionMappings(llvm::Module * mod, llvm::ExecutionEngine * ee); + +#include "pixelflinger2/pixelflinger2_format.h" + +template<GGLPixelFormat format> +void tex2d_int32(unsigned sample[4], const float tex_coord[4], const unsigned sampler); + +#endif // #if !USE_LLVM_TEXTURE_SAMPLER + +void InitializeTextureFunctions(struct GGLInterface * iface); + +#endif // #ifndef _TEXTURE_H_
\ No newline at end of file diff --git a/src/talloc/hieralloc.c b/src/talloc/hieralloc.c index 35ade04..f63351a 100644 --- a/src/talloc/hieralloc.c +++ b/src/talloc/hieralloc.c @@ -22,14 +22,17 @@ typedef struct hieralloc_header #define BEGIN_MAGIC() (13377331) #define END_MAGIC(header) ((unsigned)((const hieralloc_header_t *)header + 1) % 0x10000 | 0x13370000) -static hieralloc_header_t global_header = {0, 0, 0, 0, 0, "hieralloc_global_header", 0, 0 ,1, 0, 0}; +static hieralloc_header_t global_hieralloc_header = {BEGIN_MAGIC(), 0, 0, 0, 0, "hieralloc_global_hieralloc_header", 0, 0 ,1, 0, 0x13370000}; // Returns 1 if it's a valid header static inline int check_header(const hieralloc_header_t * header) { - if (&global_header == header) - return 1; assert(BEGIN_MAGIC() == header->beginMagic); + if (&global_hieralloc_header == header) + { + assert(0x13370000 == header->endMagic); + return 1; + } assert(END_MAGIC(header) == header->endMagic); return 1; } @@ -60,7 +63,7 @@ static void add_to_parent(hieralloc_header_t * parent, hieralloc_header_t * head // detach from parent and siblings static void remove_from_parent(hieralloc_header_t * header) { - hieralloc_header_t * parent = header->parent; + hieralloc_header_t * parent = header->parent; hieralloc_header_t * sibling = header->prevSibling; if (sibling) { @@ -105,7 +108,7 @@ void * hieralloc_allocate(const void * context, unsigned size, const char * name hieralloc_header_t * parent = NULL; if (!context) - parent = &global_header; + parent = &global_hieralloc_header; else parent = get_header(context); check_header(parent); @@ -123,7 +126,7 @@ void * hieralloc_reallocate(const void * context, void * ptr, unsigned size, con int reparented = 0; if (NULL == context) { - context = &global_header + 1; + context = &global_hieralloc_header + 1; reparented = 1; } @@ -187,7 +190,7 @@ int hieralloc_free(void * ptr) child = child->nextSibling; if (hieralloc_free(current + 1)) { - ret = 1; + ret = -1; remove_from_parent(current); add_to_parent(header->parent, current); } @@ -195,7 +198,7 @@ int hieralloc_free(void * ptr) //*/ if (ret) - return 1; + return -1; remove_from_parent(header); memset(header, 0xfe, header->size + sizeof(*header)); @@ -238,7 +241,7 @@ void * hieralloc_init(const char * name) // returns global context void * hieralloc_autofree_context() { - return &global_header + 1; + return &global_hieralloc_header + 1; } // sets destructor to be called before freeing; dctor return -1 aborts free @@ -414,7 +417,7 @@ static void _hieralloc_report(const hieralloc_header_t * header, FILE * file, un void hieralloc_report(const void * ptr, FILE * file) { if (NULL == ptr) - ptr = &global_header + 1; + ptr = &global_hieralloc_header + 1; fputs("hieralloc_report: \n", file); _hieralloc_report(get_header(ptr), file, 0); } @@ -436,9 +439,20 @@ static void _hieralloc_report_brief(const hieralloc_header_t * header, FILE * fi void hieralloc_report_brief(const void * ptr, FILE * file) { if (NULL == ptr) - ptr = &global_header + 1; + ptr = &global_hieralloc_header + 1; unsigned data [4] = {0}; _hieralloc_report_brief(get_header(ptr), file, data); fprintf(file, "hieralloc_report total: count=%d size=%d child=%d ref=%d \n", data[0], data[1], data[2], data[3]); } + +void hieralloc_report_lineage(const void * ptr, FILE * file, int tab) +{ + const hieralloc_header_t * header = get_header(ptr); + if (header->parent) + hieralloc_report_lineage(header->parent + 1, file, tab + 2); + for (tab; tab >=0; tab--) + fputc(' ', file); + fprintf(file, "hieralloc_report_lineage %p: size=%d child=%d ref=%d name='%s' parent=%p \n", + ptr, header->size, header->child, header->refCount, header->name, header->parent + 1); +} diff --git a/src/talloc/hieralloc.h b/src/talloc/hieralloc.h index 929ea42..fd1c7d3 100644 --- a/src/talloc/hieralloc.h +++ b/src/talloc/hieralloc.h @@ -88,4 +88,6 @@ void hieralloc_report(const void * ptr, FILE * file); void hieralloc_report_brief(const void * ptr, FILE * file); +void hieralloc_report_lineage(const void * ptr, FILE * file, int tab); + #endif |