diff options
author | Marek Olšák <marek.olsak@amd.com> | 2017-04-30 16:57:05 +0200 |
---|---|---|
committer | Marek Olšák <marek.olsak@amd.com> | 2017-05-05 19:38:02 +0200 |
commit | c12816f7b3154c87c94244db01c5a79bca1d38a9 (patch) | |
tree | b112d82d65bf233f8659b47b6cf07b6f08f1e105 /tests | |
parent | 54a78c475abde7627a7377787c6c6d6df64d009e (diff) |
drawoverhead: new microbenchmark
Based on a benchmark from mesa/demos, but rewritten and extended.
It's a benchmark expected to be run separately, not a piglit test.
So why piglit? Because it's a good framework for writing apps like this.
mesa_glthread won't show an improvement here, because there is no app
overhead.
This is what the output looks like. The percentage is relative to
the first test of the given draw call.
The obvious thing there is that enabled vertex attribs decrease
Mesa performance even if there are no state changes.
Using Core profile.
Draw calls per second:
DrawElements ( 1 VBOs, 0 UBOs, 0 Tex) w/ no state change: 5.71 million (100.0%)
DrawElements ( 4 VBOs, 0 UBOs, 0 Tex) w/ no state change: 5.18 million (90.8%)
DrawElements (16 VBOs, 0 UBOs, 0 Tex) w/ no state change: 3.65 million (63.9%)
DrawElements ( 1 VBOs, 0 UBOs, 16 Tex) w/ no state change: 5.71 million (100.0%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ no state change: 5.78 million (101.2%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ shader program change: 220.11 thousand (3.9%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ vertex attrib change: 1.06 million (18.5%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ 1 texture change: 483.27 thousand (8.5%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ 8 textures change: 291.20 thousand (5.1%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ 1 UBO change: 1.84 million (32.3%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ 4 UBOs change: 1.12 million (19.7%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ few uniforms / 1 change: 2.27 million (39.8%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ many uniforms / 1 change: 966.00 thousand (16.9%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ blend enable change: 1.37 million (24.0%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ depth enable change: 1.86 million (32.6%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ stencil enable change: 1.66 million (29.0%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ scissor enable change: 1.09 million (19.1%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ MSAA enable change: 1.94 million (34.0%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ cull face enable change: 1.56 million (27.3%)
DrawElements ( 1 VBOs, 4 UBOs, 8 Tex) w/ FB sRGB enable change: 200.81 thousand (3.5%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ shader program change: 186.92 thousand (3.3%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ vertex attrib change: 638.49 thousand (11.2%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ 1 texture change: 452.39 thousand (7.9%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ 8 textures change: 278.79 thousand (4.9%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ 1 UBO change: 1.47 million (25.7%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ 4 UBOs change: 974.30 thousand (17.1%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ few uniforms / 1 change: 1.79 million (31.3%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ many uniforms / 1 change: 853.07 thousand (14.9%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ blend enable change: 1.16 million (20.3%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ depth enable change: 1.49 million (26.2%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ stencil enable change: 1.35 million (23.7%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ scissor enable change: 946.45 thousand (16.6%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ MSAA enable change: 1.62 million (28.3%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ cull face enable change: 1.29 million (22.5%)
DrawElements (16 VBOs, 4 UBOs, 8 Tex) w/ FB sRGB enable change: 126.44 thousand (2.2%)
DrawArrays ( 1 VBOs, 0 UBOs, 0 Tex) w/ no state change: 8.02 million (100.0%)
DrawArrays ( 4 VBOs, 0 UBOs, 0 Tex) w/ no state change: 7.14 million (89.0%)
DrawArrays (16 VBOs, 0 UBOs, 0 Tex) w/ no state change: 4.26 million (53.0%)
DrawArrays ( 1 VBOs, 0 UBOs, 16 Tex) w/ no state change: 7.89 million (98.4%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ no state change: 8.01 million (99.9%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ shader program change: 221.09 thousand (2.8%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ vertex attrib change: 1.13 million (14.1%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ 1 texture change: 500.25 thousand (6.2%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ 8 textures change: 294.30 thousand (3.7%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ 1 UBO change: 2.02 million (25.2%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ 4 UBOs change: 1.18 million (14.7%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ few uniforms / 1 change: 2.28 million (28.4%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ many uniforms / 1 change: 617.79 thousand (7.7%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ blend enable change: 1.59 million (19.8%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ depth enable change: 2.09 million (26.0%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ stencil enable change: 2.02 million (25.2%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ scissor enable change: 1.18 million (14.7%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ MSAA enable change: 2.27 million (28.3%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ cull face enable change: 1.77 million (22.1%)
DrawArrays ( 1 VBOs, 4 UBOs, 8 Tex) w/ FB sRGB enable change: 204.60 thousand (2.6%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ shader program change: 191.50 thousand (2.4%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ vertex attrib change: 679.98 thousand (8.5%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ 1 texture change: 472.00 thousand (5.9%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ 8 textures change: 286.70 thousand (3.6%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ 1 UBO change: 1.69 million (21.0%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ 4 UBOs change: 1.04 million (13.0%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ few uniforms / 1 change: 2.04 million (25.5%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ many uniforms / 1 change: 620.41 thousand (7.7%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ blend enable change: 1.30 million (16.2%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ depth enable change: 1.69 million (21.0%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ stencil enable change: 1.55 million (19.3%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ scissor enable change: 1.04 million (13.0%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ MSAA enable change: 1.82 million (22.7%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ cull face enable change: 1.47 million (18.3%)
DrawArrays (16 VBOs, 4 UBOs, 8 Tex) w/ FB sRGB enable change: 129.25 thousand (1.6%)
Diffstat (limited to 'tests')
-rw-r--r-- | tests/CMakeLists.txt | 1 | ||||
-rw-r--r-- | tests/perf/CMakeLists.gl.txt | 14 | ||||
-rw-r--r-- | tests/perf/CMakeLists.txt | 1 | ||||
-rw-r--r-- | tests/perf/common.c | 117 | ||||
-rw-r--r-- | tests/perf/common.h | 34 | ||||
-rw-r--r-- | tests/perf/drawoverhead.c | 495 |
6 files changed, 662 insertions, 0 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index af842fc30..7aab33fe7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -16,6 +16,7 @@ add_subdirectory (shaders) add_subdirectory (texturing) add_subdirectory (spec) add_subdirectory (fast_color_clear) +add_subdirectory (perf) if (NOT APPLE) # glean relies on AGL which is deprecated/broken on recent Mac OS X diff --git a/tests/perf/CMakeLists.gl.txt b/tests/perf/CMakeLists.gl.txt new file mode 100644 index 000000000..f9d311525 --- /dev/null +++ b/tests/perf/CMakeLists.gl.txt @@ -0,0 +1,14 @@ + +include_directories( + ${GLEXT_INCLUDE_DIR} + ${OPENGL_INCLUDE_PATH} +) + +link_libraries ( + piglitutil_${piglit_target_api} + ${OPENGL_gl_LIBRARY} +) + +piglit_add_executable (drawoverhead drawoverhead.c common.c) + +# vim: ft=cmake: diff --git a/tests/perf/CMakeLists.txt b/tests/perf/CMakeLists.txt new file mode 100644 index 000000000..144a306f4 --- /dev/null +++ b/tests/perf/CMakeLists.txt @@ -0,0 +1 @@ +piglit_include_target_api() diff --git a/tests/perf/common.c b/tests/perf/common.c new file mode 100644 index 000000000..e157d82c5 --- /dev/null +++ b/tests/perf/common.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Common perf code. This should be re-usable with other tests. + */ + +#include "piglit-util-gl.h" +#include "common.h" + +/** Return time in seconds */ +static double +perf_get_time(void) +{ + return piglit_time_get_nano() * 0.000000001; +} + +/** + * Run function 'f' for enough iterations to reach a steady state. + * Return the rate (iterations/second). + */ +double +perf_measure_rate(perf_rate_func f) +{ + const double minDuration = 0.5; + double rate = 0.0, prevRate = 0.0; + unsigned subiters; + + /* Compute initial number of iterations to try. + * If the test function is pretty slow this helps to avoid + * extraordinarily long run times. + */ + subiters = 2; + { + const double t0 = perf_get_time(); + double t1; + do { + f(subiters); /* call the rendering function */ + glFinish(); + t1 = perf_get_time(); + subiters *= 2; + } while (t1 - t0 < 0.1 * minDuration); + } + /*perf_printf("initial subIters = %u\n", subiters);*/ + + while (1) { + const double t0 = perf_get_time(); + unsigned iters = 0; + double t1; + + do { + f(subiters); /* call the rendering function */ + glFinish(); + t1 = perf_get_time(); + iters += subiters; + } while (t1 - t0 < minDuration); + + rate = iters / (t1 - t0); + + if (0) + printf("prevRate %f rate %f ratio %f iters %u\n", + prevRate, rate, rate/prevRate, iters); + + /* Try and speed the search up by skipping a few steps: */ + if (rate > prevRate * 1.6) + subiters *= 8; + else if (rate > prevRate * 1.2) + subiters *= 4; + else if (rate > prevRate * 1.05) + subiters *= 2; + else + break; + + prevRate = rate; + } + + if (0) + printf("%s returning iters %u rate %f\n", __FUNCTION__, subiters, rate); + return rate; +} + +/* Note static buffer, can only use once per printf. + */ +const char * +perf_human_float( double d ) +{ + static char buf[80]; + + if (d > 1000000000.0) + snprintf(buf, sizeof(buf), "%.2f billion", d / 1000000000.0); + else if (d > 1000000.0) + snprintf(buf, sizeof(buf), "%.2f million", d / 1000000.0); + else if (d > 1000.0) + snprintf(buf, sizeof(buf), "%.2f thousand", d / 1000.0); + else + snprintf(buf, sizeof(buf), "%.2f", d); + + return buf; +} diff --git a/tests/perf/common.h b/tests/perf/common.h new file mode 100644 index 000000000..daa0d42ab --- /dev/null +++ b/tests/perf/common.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef COMMON_H +#define COMMON_H + +typedef void (*perf_rate_func)(unsigned count); + +double +perf_measure_rate(perf_rate_func f); + +const char * +perf_human_float( double d ); + +#endif /* COMMON_H */ + diff --git a/tests/perf/drawoverhead.c b/tests/perf/drawoverhead.c new file mode 100644 index 000000000..9dd110f2c --- /dev/null +++ b/tests/perf/drawoverhead.c @@ -0,0 +1,495 @@ +/* + * Copyright (C) 2009 VMware, Inc. + * Copyright (C) 2017 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "common.h" +#include <stdbool.h> +#include "piglit-util-gl.h" + +PIGLIT_GL_TEST_CONFIG_BEGIN + + config.supports_gl_compat_version = 0; + config.supports_gl_core_version = 32; + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-compat")) { + config.supports_gl_compat_version = 10; + config.supports_gl_core_version = 0; + break; + } + } + puts(config.supports_gl_core_version ? "Using Core profile." : + "Using Compatibility profile."); + puts("Draw calls per second:"); + + config.window_visual = PIGLIT_GL_VISUAL_RGBA | PIGLIT_GL_VISUAL_DOUBLE | + PIGLIT_GL_VISUAL_DEPTH | PIGLIT_GL_VISUAL_STENCIL; + +PIGLIT_GL_TEST_CONFIG_END + +static GLuint prog[2], uniform_loc, tex[8], ubo[4]; +static bool indexed; +static GLenum enable_enum; + +void +piglit_init(int argc, char **argv) +{ + static const unsigned indices[4] = {0, 1, 2, 3}; + GLuint vao, ebo; + + piglit_require_gl_version(30); + + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + + glGenBuffers(1, &ebo); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, + sizeof(indices), indices, GL_STATIC_DRAW); +} + +static void +get_vs_text(char *s, unsigned num_vbos, bool is_second) +{ + unsigned i; + + strcpy(s, "#version 130\n" + "#extension GL_ARB_explicit_attrib_location : require\n"); + for (i = 0; i < num_vbos; i++) { + sprintf(s + strlen(s), + "layout (location = %u) in vec4 v%u;\n", i, i); + } + strcat(s, "void main() {\n" + " gl_Position = vec4(0.0)"); + for (i = 0; i < num_vbos; i++) + sprintf(s + strlen(s), " + v%u", i); + if (is_second) + strcat(s, " + vec4(0.5)"); + strcat(s, ";\n}\n"); +} + +static void +get_fs_text(char *s, unsigned num_ubos, unsigned num_textures, bool is_second) +{ + unsigned i; + + strcpy(s, "#version 130\n" + "#extension GL_ARB_uniform_buffer_object : require\n" + "uniform int index = 0;"); + sprintf(s + strlen(s), "uniform vec4 u[%u];\n", is_second ? 240 : 1); + + for (i = 0; i < num_textures; i++) + sprintf(s + strlen(s), "uniform sampler2D s%u;\n", i); + for (i = 0; i < num_ubos; i++) + sprintf(s + strlen(s), "uniform ub%u { vec4 ubu%u[10]; };\n", i, i); + + strcat(s, "void main() {\n"); + strcat(s, " gl_FragData[0] = u[index]"); + for (i = 0; i < num_textures; i++) + sprintf(s + strlen(s), " + texture(s%u, u[0].xy)", i); + for (i = 0; i < num_ubos; i++) + sprintf(s + strlen(s), " + ubu%u[index]", i); + if (is_second) + strcat(s, " + vec4(0.5)"); + strcat(s, ";\n}\n"); +} + +static void +setup_shaders_and_resources(unsigned num_vbos, + unsigned num_ubos, + unsigned num_textures) +{ + const unsigned max = 16; + char vs[4096], fs[4096]; + unsigned p, i; + + assert(num_vbos <= max); + assert(num_ubos <= max); + assert(num_textures <= max); + + for (i = 0; i < max; i++) + glDisableVertexAttribArray(i); + + /* Create two programs in case we want to test program changes. */ + for (p = 0; p < 2; p++) { + get_vs_text(vs, num_vbos, p); + get_fs_text(fs, num_ubos, num_textures, p); + prog[p] = piglit_build_simple_program(vs, fs); + + /* Assign texture units to samplers. */ + glUseProgram(prog[p]); + for (i = 0; i < num_textures; i++) { + char sampler[20]; + int loc; + + snprintf(sampler, sizeof(sampler), "s%u", i); + loc = glGetUniformLocation(prog[p], sampler); + assert(loc >= 0); + glUniform1i(loc, i); + } + /* Assign UBO slots to uniform blocks. */ + for (i = 0; i < num_ubos; i++) { + char block[20]; + int index; + + snprintf(block, sizeof(block), "ub%u", i); + index = glGetUniformBlockIndex(prog[p], block); + assert(index != GL_INVALID_INDEX); + glUniformBlockBinding(prog[p], index, i); + } + } + glUseProgram(prog[0]); + + for (i = 0; i < num_ubos; i++) { + static const float data[10*4]; + GLuint ub; + + glGenBuffers(1, &ub); + glBindBuffer(GL_UNIFORM_BUFFER, ub); + glBufferData(GL_UNIFORM_BUFFER, sizeof(data), data, + GL_STATIC_DRAW); + + glBindBufferBase(GL_UNIFORM_BUFFER, i, ub); + /* Save the last UBOs for testing UBO changes. */ + ubo[i % 4] = ub; + } + /* setup VBO w/ vertex data, we need a different buffer in each attrib */ + for (i = 0; i < num_vbos; i++) { + /* Vertex positions are all zeroed - we want all primitives + * to be culled. + */ + static const float vertices[4][3]; + GLuint vbo; + + glGenBuffers(1, &vbo); + glBindBuffer(GL_ARRAY_BUFFER, vbo); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, + GL_STATIC_DRAW); + + glVertexAttribPointer(i, 2, GL_FLOAT, GL_FALSE, + 3 * sizeof(float), NULL); + glEnableVertexAttribArray(i); + } + for (i = 0; i < num_textures; i++) { + glActiveTexture(GL_TEXTURE0 + i); + /* Save the last texture IDs for testing texture changes. */ + tex[i % 8] = piglit_rgbw_texture(GL_RGBA8, 4, 4, false, true, + GL_UNSIGNED_BYTE); + } + glActiveTexture(GL_TEXTURE0); +} + +static void +draw(unsigned count) +{ + unsigned i; + if (indexed) { + for (i = 0; i < count; i++) + glDrawElements(GL_POINTS, 4, GL_UNSIGNED_INT, NULL); + } else { + for (i = 0; i < count; i++) + glDrawArrays(GL_POINTS, 0, 4); + } +} + +static void +draw_shader_change(unsigned count) +{ + unsigned i; + if (indexed) { + for (i = 0; i < count; i++) { + glUseProgram(prog[i & 1]); + glDrawElements(GL_POINTS, 4, GL_UNSIGNED_INT, NULL); + } + } else { + for (i = 0; i < count; i++) { + glUseProgram(prog[i & 1]); + glDrawArrays(GL_POINTS, 0, 4); + } + } + glUseProgram(prog[0]); +} + +static void +draw_uniform_change(unsigned count) +{ + unsigned i; + if (indexed) { + for (i = 0; i < count; i++) { + glUniform4f(uniform_loc, i & 1, 0, 0, 0); + glDrawElements(GL_POINTS, 4, GL_UNSIGNED_INT, NULL); + } + } else { + for (i = 0; i < count; i++) { + glUniform4f(uniform_loc, i & 1, 0, 0, 0); + glDrawArrays(GL_POINTS, 0, 4); + } + } +} + +static void +draw_one_texture_change(unsigned count) +{ + unsigned i; + if (indexed) { + for (i = 0; i < count; i++) { + glBindTexture(GL_TEXTURE_2D, tex[i & 1]); + glDrawElements(GL_POINTS, 4, GL_UNSIGNED_INT, NULL); + } + } else { + for (i = 0; i < count; i++) { + glBindTexture(GL_TEXTURE_2D, tex[i & 1]); + glDrawArrays(GL_POINTS, 0, 4); + } + } +} + +static void +draw_many_texture_change(unsigned count) +{ + unsigned i,j; + if (indexed) { + for (i = 0; i < count; i++) { + for (j = 0; j < 8; j++) { + glActiveTexture(GL_TEXTURE0 + j); + glBindTexture(GL_TEXTURE_2D, tex[(i + j) % 8]); + } + glActiveTexture(GL_TEXTURE0); + glDrawElements(GL_POINTS, 4, GL_UNSIGNED_INT, NULL); + } + } else { + for (i = 0; i < count; i++) { + for (j = 0; j < 8; j++) { + glActiveTexture(GL_TEXTURE0 + j); + glBindTexture(GL_TEXTURE_2D, tex[(i + j) % 8]); + } + glActiveTexture(GL_TEXTURE0); + glDrawArrays(GL_POINTS, 0, 4); + } + } +} + +static void +draw_one_ubo_change(unsigned count) +{ + unsigned i; + if (indexed) { + for (i = 0; i < count; i++) { + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo[i & 1]); + glDrawElements(GL_POINTS, 4, GL_UNSIGNED_INT, NULL); + } + } else { + for (i = 0; i < count; i++) { + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo[i & 1]); + glDrawArrays(GL_POINTS, 0, 4); + } + } +} + +static void +draw_many_ubo_change(unsigned count) +{ + unsigned i,j; + if (indexed) { + for (i = 0; i < count; i++) { + for (j = 0; j < 4; j++) + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo[(i + j) % 4]); + glDrawElements(GL_POINTS, 4, GL_UNSIGNED_INT, NULL); + } + } else { + for (i = 0; i < count; i++) { + for (j = 0; j < 4; j++) + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo[(i + j) % 4]); + glDrawArrays(GL_POINTS, 0, 4); + } + } +} + +static void +draw_state_change(unsigned count) +{ + unsigned i; + if (indexed) { + for (i = 0; i < count; i++) { + if (i & 1) + glEnable(enable_enum); + else + glDisable(enable_enum); + glDrawElements(GL_POINTS, 4, GL_UNSIGNED_INT, NULL); + } + } else { + for (i = 0; i < count; i++) { + if (i & 1) + glEnable(enable_enum); + else + glDisable(enable_enum); + glDrawArrays(GL_POINTS, 0, 4); + } + } + glDisable(enable_enum); +} + +static void +draw_vertex_attrib_change(unsigned count) +{ + unsigned i; + if (indexed) { + for (i = 0; i < count; i++) { + if (i & 1) + glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, + 3 * sizeof(float), NULL); + else + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, + 3 * sizeof(float), NULL); + glDrawElements(GL_POINTS, 4, GL_UNSIGNED_INT, NULL); + } + } else { + for (i = 0; i < count; i++) { + if (i & 1) + glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, + 3 * sizeof(float), NULL); + else + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, + 3 * sizeof(float), NULL); + glDrawArrays(GL_POINTS, 0, 4); + } + } +} + +#define COLOR_RESET "\033[0m" +#define COLOR_RED "\033[31m" +#define COLOR_GREEN "\033[1;32m" +#define COLOR_YELLOW "\033[1;33m" +#define COLOR_CYAN "\033[1;36m" + +static double +perf_run(const char *call, unsigned num_vbos, unsigned num_ubos, + unsigned num_textures, const char *change, perf_rate_func f, + double base_rate) +{ + double rate = perf_measure_rate(f); + double ratio = base_rate ? rate / base_rate : 1; + + printf(" %s (%2u VBOs, %u UBOs, %2u Tex) w/ %s change:%*s" + COLOR_CYAN "%s" COLOR_RESET " %s(%.1f%%)" COLOR_RESET "\n", + call, num_vbos, num_ubos, num_textures, change, + MAX2(18 - (int)strlen(change), 0), "", + perf_human_float(rate), + base_rate == 0 ? COLOR_RESET : + ratio > 0.7 ? COLOR_GREEN : + ratio > 0.4 ? COLOR_YELLOW : COLOR_RED, + 100 * ratio); + return rate; +} + +struct enable_state_t { + GLenum enable; + const char *name; +}; + +static struct enable_state_t enable_states[] = { + {GL_BLEND, "blend enable"}, + {GL_DEPTH_TEST, "depth enable"}, + {GL_STENCIL_TEST, "stencil enable"}, + {GL_SCISSOR_TEST, "scissor enable"}, + {GL_MULTISAMPLE, "MSAA enable"}, + {GL_CULL_FACE, "cull face enable"}, + {GL_FRAMEBUFFER_SRGB, "FB sRGB enable"}, +}; + +static void +perf_draw_variant(const char *call, bool is_indexed) +{ + double base_rate = 0; + unsigned num_vbos, num_ubos, num_textures; + + indexed = is_indexed; + + /* Test different shader resource usage without state changes. */ + num_ubos = 0; + num_textures = 0; + for (num_vbos = 1; num_vbos <= 16; num_vbos *= 4) { + setup_shaders_and_resources(num_vbos, num_ubos, num_textures); + + double rate = perf_run(call, num_vbos, num_ubos, num_textures, "no state", + draw, base_rate); + if (num_vbos == 1) + base_rate = rate; + } + + num_vbos = 1; + num_ubos = 0; + num_textures = 16; + setup_shaders_and_resources(num_vbos, num_ubos, num_textures); + perf_run(call, num_vbos, num_ubos, num_textures, "no state", + draw, base_rate); + + /* Test state changes. */ + num_ubos = 4; + num_textures = 8; + for (num_vbos = 1; num_vbos <= 16; num_vbos *= 16) { + setup_shaders_and_resources(num_vbos, num_ubos, num_textures); + + perf_run(call, num_vbos, num_ubos, num_textures, "no state", + draw, base_rate); + perf_run(call, num_vbos, num_ubos, num_textures, "shader program", + draw_shader_change, base_rate); + perf_run(call, num_vbos, num_ubos, num_textures, "vertex attrib", + draw_vertex_attrib_change, base_rate); + perf_run(call, num_vbos, num_ubos, num_textures, "1 texture", + draw_one_texture_change, base_rate); + perf_run(call, num_vbos, num_ubos, num_textures, "8 textures", + draw_many_texture_change, base_rate); + perf_run(call, num_vbos, num_ubos, num_textures, "1 UBO", + draw_one_ubo_change, base_rate); + perf_run(call, num_vbos, num_ubos, num_textures, "4 UBOs", + draw_many_ubo_change, base_rate); + + glUseProgram(prog[0]); + uniform_loc = glGetUniformLocation(prog[0], "u"); + perf_run(call, num_vbos, num_ubos, num_textures, "few uniforms / 1", + draw_uniform_change, base_rate); + + glUseProgram(prog[1]); + uniform_loc = glGetUniformLocation(prog[1], "u"); + perf_run(call, num_vbos, num_ubos, num_textures, "many uniforms / 1", + draw_uniform_change, base_rate); + glUseProgram(prog[0]); + + for (int state = 0; state < ARRAY_SIZE(enable_states); state++) { + enable_enum = enable_states[state].enable; + perf_run(call, num_vbos, num_ubos, num_textures, + enable_states[state].name, + draw_state_change, base_rate); + } + } +} + +/** Called from test harness/main */ +enum piglit_result +piglit_display(void) +{ + perf_draw_variant("DrawElements", true); + perf_draw_variant("DrawArrays", false); + + exit(0); + return PIGLIT_SKIP; +} |