diff options
author | Marek Olšák <marek.olsak@amd.com> | 2018-08-10 16:21:21 -0400 |
---|---|---|
committer | Marek Olšák <marek.olsak@amd.com> | 2019-06-03 16:02:31 -0400 |
commit | 6e1bb1e769d22c524b2066933804f9fcfdcb7c95 (patch) | |
tree | f465fe37bf799079625e28bef06d55bca9827082 | |
parent | e26e7ea5fd31a13014a54ed579d7436d3b86656b (diff) |
Add a test that measures primitive rate
The output looks like this (from the initial version, the current version is slightly different):
Measuring GPrims/second, , Number of primitives
Draw Call , Cull Method , 2K, 4K, 8K, 16K, 32K, 64K, 256K
--------------,----------------------,------,------,------,------,------,------,------
glDrawElements, none , 2.80, 2.69, 2.91, 2.89, 2.91, 2.92, 2.42
glDrawElements, rasterizer discard , 4.80, 4.77, 4.95, 4.84, 4.91, 4.85, 4.93
glDrawElements, 100% back faces , 3.27, 3.19, 3.29, 3.21, 3.26, 3.32, 3.33
glDrawElements, 75% back faces , 3.27, 3.47, 3.29, 3.54, 3.53, 3.60, 3.49
glDrawElements, 50% back faces , 3.92, 3.83, 3.34, 3.58, 3.68, 3.63, 2.76
glDrawElements, 25% back faces , 3.66, 3.52, 3.12, 3.18, 3.00, 2.78, 3.45
glDrawElements, 100% culled by view , 4.85, 4.75, 4.94, 4.68, 4.91, 4.80, 4.94
glDrawElements, 75% culled by view , 4.82, 4.68, 4.77, 4.76, 4.80, 4.65, 3.20
glDrawElements, 50% culled by view , 4.73, 4.65, 4.46, 3.40, 4.86, 4.04, 2.99
glDrawElements, 25% culled by view , 3.67, 3.48, 3.26, 2.70, 2.76, 2.60, 2.46
glDrawElements, 100% degenerate prims, 1.67, 1.66, 1.68, 1.66, 1.68, 1.68, 1.68
glDrawElements, 75% degenerate prims, 1.65, 1.90, 1.67, 1.96, 1.86, 2.01, 1.83
glDrawElements, 50% degenerate prims, 2.43, 2.37, 1.66, 2.44, 1.90, 2.24, 1.98
glDrawElements, 25% degenerate prims, 2.49, 2.94, 1.67, 2.03, 2.76, 2.79, 2.15
glDrawElements, 98 small prims/pixel , 4.82, 4.65, 4.85, 4.80, 4.90, 4.77, 4.30
glDrawElements, 32 small prims/pixel , 4.86, 4.71, 4.80, 4.69, 4.81, 4.16, 4.95
glDrawElements, 8 small prims/pixel , 4.73, 4.67, 4.92, 4.85, 4.91, 4.86, 3.20
glDrawArrays , none , 1.67, 1.66, 1.65, 1.47, 1.52, 1.24, 1.60
etc.
-rw-r--r-- | tests/perf/CMakeLists.gl.txt | 1 | ||||
-rw-r--r-- | tests/perf/common.c | 5 | ||||
-rw-r--r-- | tests/perf/common.h | 2 | ||||
-rw-r--r-- | tests/perf/draw-prim-rate.c | 519 | ||||
-rw-r--r-- | tests/perf/drawoverhead.c | 2 |
5 files changed, 524 insertions, 5 deletions
diff --git a/tests/perf/CMakeLists.gl.txt b/tests/perf/CMakeLists.gl.txt index f9d311525..6f9c2c565 100644 --- a/tests/perf/CMakeLists.gl.txt +++ b/tests/perf/CMakeLists.gl.txt @@ -10,5 +10,6 @@ link_libraries ( ) piglit_add_executable (drawoverhead drawoverhead.c common.c) +piglit_add_executable (draw-prim-rate draw-prim-rate.c common.c) # vim: ft=cmake: diff --git a/tests/perf/common.c b/tests/perf/common.c index 791b7f943..d61dfdca3 100644 --- a/tests/perf/common.c +++ b/tests/perf/common.c @@ -38,9 +38,8 @@ perf_get_time(void) * Return the rate (iterations/second). */ double -perf_measure_rate(perf_rate_func f) +perf_measure_rate(perf_rate_func f, double minDuration) { - const double minDuration = 0.5; double rate = 0.0, prevRate = 0.0; unsigned subiters; @@ -59,7 +58,7 @@ perf_measure_rate(perf_rate_func f) subiters *= 2; } while (t1 - t0 < 0.1 * minDuration); } - /*perf_printf("initial subIters = %u\n", subiters);*/ + /*printf("initial subIters = %u\n", subiters);*/ while (1) { const double t0 = perf_get_time(); diff --git a/tests/perf/common.h b/tests/perf/common.h index 0da3b7b5f..7b7856b4a 100644 --- a/tests/perf/common.h +++ b/tests/perf/common.h @@ -25,7 +25,7 @@ typedef void (*perf_rate_func)(unsigned count); double -perf_measure_rate(perf_rate_func f); +perf_measure_rate(perf_rate_func f, double minDuration); #endif /* COMMON_H */ diff --git a/tests/perf/draw-prim-rate.c b/tests/perf/draw-prim-rate.c new file mode 100644 index 000000000..85792b00e --- /dev/null +++ b/tests/perf/draw-prim-rate.c @@ -0,0 +1,519 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Measure primitive rate under various circumstances. + * + * Culling methods: + * - none + * - rasterizer discard + * - face culling + * - view culling + * - degenerate primitives + * - subpixel primitives + */ + +#include "common.h" +#include <stdbool.h> +#undef NDEBUG +#include <assert.h> +#include "piglit-util-gl.h" + +/* this must be a power of two to prevent precision issues */ +#define WINDOW_SIZE 1024 + +PIGLIT_GL_TEST_CONFIG_BEGIN + + config.supports_gl_compat_version = 10; + config.window_width = WINDOW_SIZE; + config.window_height = WINDOW_SIZE; + config.window_visual = PIGLIT_GL_VISUAL_RGBA | PIGLIT_GL_VISUAL_DOUBLE; + +PIGLIT_GL_TEST_CONFIG_END + +static unsigned gpu_freq_mhz; +static GLint progs[3]; + +void +piglit_init(int argc, char **argv) +{ + for (unsigned i = 1; i < argc; i++) { + if (strncmp(argv[i], "-freq=", 6) == 0) + sscanf(argv[i] + 6, "%u", &gpu_freq_mhz); + } + + piglit_require_gl_version(32); + + progs[0] = piglit_build_simple_program( + "#version 120 \n" + "void main() { \n" + " gl_Position = gl_Vertex; \n" + "}", + + "#version 120 \n" + "void main() { \n" + " gl_FragColor = vec4(1.0); \n" + "}"); + + progs[1] = piglit_build_simple_program( + "#version 150 compatibility \n" + "varying vec4 v[4]; \n" + "attribute vec4 a[4]; \n" + "void main() { \n" + " for (int i = 0; i < 4; i++) v[i] = a[i]; \n" + " gl_Position = gl_Vertex; \n" + "}", + + "#version 150 compatibility \n" + "varying vec4 v[4]; \n" + "void main() { \n" + " gl_FragColor = vec4(dot(v[0] + v[1] + v[2] + v[3], vec4(1.0)) == 1.0 ? 0.0 : 1.0); \n" + "}"); + + progs[2] = piglit_build_simple_program( + "#version 150 compatibility \n" + "varying vec4 v[8]; \n" + "attribute vec4 a[8]; \n" + "void main() { \n" + " for (int i = 0; i < 8; i++) v[i] = a[i]; \n" + " gl_Position = gl_Vertex; \n" + "}", + + "#version 150 compatibility \n" + "varying vec4 v[8]; \n" + "void main() { \n" + " gl_FragColor = vec4(dot(v[0] + v[1] + v[2] + v[3] + v[4] + v[5] + v[6] + v[7], vec4(1.0)) == 1.0 ? 0.0 : 1.0); \n" + "}"); + + glEnableClientState(GL_VERTEX_ARRAY); + glEnable(GL_CULL_FACE); +} + +static void +gen_triangle_tile(unsigned num_quads_per_dim, double prim_size_in_pixels, + unsigned cull_percentage, + bool back_face_culling, bool view_culling, bool degenerate_prims, + unsigned max_vertices, unsigned *num_vertices, float *vertices, + unsigned max_indices, unsigned *num_indices, unsigned *indices) +{ + /* clip space coordinates in both X and Y directions: */ + const double first = -1; + const double max_length = 2; + const double d = prim_size_in_pixels * 2.0 / WINDOW_SIZE; + + assert(d * num_quads_per_dim <= max_length); + assert(*num_vertices == 0); + + /* the vertex ordering is counter-clockwise */ + for (unsigned ty = 0; ty < num_quads_per_dim; ty++) { + bool cull; + + if (cull_percentage == 0) + cull = false; + else if (cull_percentage == 25) + cull = ty % 4 == 0; + else if (cull_percentage == 50) + cull = ty % 2 == 0; + else if (cull_percentage == 75) + cull = ty % 4 != 0; + else if (cull_percentage == 100) + cull = true; + else + assert(!"wrong cull_percentage"); + + for (unsigned tx = 0; tx < num_quads_per_dim; tx++) { + unsigned x = tx; + unsigned y = ty; + + /* view culling in different directions */ + double xoffset = 0, yoffset = 0, zoffset = 0; + + if (cull && view_culling) { + unsigned side = (ty / 2) % 4; + + if (side == 0) xoffset = -2; + else if (side == 1) xoffset = 2; + else if (side == 2) yoffset = -2; + else if (side == 3) yoffset = 2; + } + + if (indices) { + unsigned elem = *num_vertices * 3; + + /* generate horizontal stripes with maximum reuse */ + if (x == 0) { + *num_vertices += 2; + assert(*num_vertices <= max_vertices); + + vertices[elem++] = xoffset + first + d * x; + vertices[elem++] = yoffset + first + d * y; + vertices[elem++] = zoffset; + + vertices[elem++] = xoffset + first + d * x; + vertices[elem++] = yoffset + first + d * (y + 1); + vertices[elem++] = zoffset; + } + + int base_index = *num_vertices; + + *num_vertices += 2; + assert(*num_vertices <= max_vertices); + + vertices[elem++] = xoffset + first + d * (x + 1); + vertices[elem++] = yoffset + first + d * y; + vertices[elem++] = zoffset; + + vertices[elem++] = xoffset + first + d * (x + 1); + vertices[elem++] = yoffset + first + d * (y + 1); + vertices[elem++] = zoffset; + + /* generate indices */ + unsigned idx = *num_indices; + *num_indices += 6; + assert(*num_indices <= max_indices); + + indices[idx++] = base_index - 2; + indices[idx++] = base_index; + indices[idx++] = base_index - 1; + + indices[idx++] = base_index - 1; + indices[idx++] = base_index; + indices[idx++] = base_index + 1; + + if (cull && back_face_culling) { + /* switch the winding order */ + unsigned tmp = indices[idx - 6]; + indices[idx - 6] = indices[idx - 5]; + indices[idx - 5] = tmp; + + tmp = indices[idx - 3]; + indices[idx - 3] = indices[idx - 2]; + indices[idx - 2] = tmp; + } + + if (cull && degenerate_prims) { + indices[idx - 5] = indices[idx - 4]; + indices[idx - 2] = indices[idx - 1]; + } + } else { + unsigned elem = *num_vertices * 3; + *num_vertices += 6; + assert(*num_vertices <= max_vertices); + + vertices[elem++] = xoffset + first + d * x; + vertices[elem++] = yoffset + first + d * y; + vertices[elem++] = zoffset; + + vertices[elem++] = xoffset + first + d * (x + 1); + vertices[elem++] = yoffset + first + d * y; + vertices[elem++] = zoffset; + + vertices[elem++] = xoffset + first + d * x; + vertices[elem++] = yoffset + first + d * (y + 1); + vertices[elem++] = zoffset; + + vertices[elem++] = xoffset + first + d * x; + vertices[elem++] = yoffset + first + d * (y + 1); + vertices[elem++] = zoffset; + + vertices[elem++] = xoffset + first + d * (x + 1); + vertices[elem++] = yoffset + first + d * y; + vertices[elem++] = zoffset; + + vertices[elem++] = xoffset + first + d * (x + 1); + vertices[elem++] = yoffset + first + d * (y + 1); + vertices[elem++] = zoffset; + + if (cull && back_face_culling) { + /* switch the winding order */ + float old[6*3]; + memcpy(old, vertices + elem - 6*3, 6*3*4); + + for (unsigned i = 0; i < 6; i++) { + vertices[elem - 6*3 + i*3 + 0] = old[(5 - i)*3 + 0]; + vertices[elem - 6*3 + i*3 + 1] = old[(5 - i)*3 + 1]; + vertices[elem - 6*3 + i*3 + 2] = old[(5 - i)*3 + 2]; + } + } + + if (cull && degenerate_prims) { + /* use any previously generated vertices */ + unsigned v0 = rand() % *num_vertices; + unsigned v1 = rand() % *num_vertices; + + memcpy(&vertices[elem - 5*3], &vertices[v0*3], 12); + memcpy(&vertices[elem - 4*3], &vertices[v0*3], 12); + + memcpy(&vertices[elem - 2*3], &vertices[v1*3], 12); + memcpy(&vertices[elem - 1*3], &vertices[v1*3], 12); + } + } + } + } +} + +static bool is_indexed; +static unsigned count; +static unsigned num_duplicates; +static unsigned duplicate_index; +static unsigned vb_size, ib_size; + +static void +run_draw(unsigned iterations) +{ + for (unsigned i = 0; i < iterations; i++) { + glVertexPointer(3, GL_FLOAT, 0, + (void*)(long)(vb_size * duplicate_index)); + + if (is_indexed) { + glDrawElements(GL_TRIANGLES, count, + GL_UNSIGNED_INT, + (void*)(long)(ib_size * duplicate_index)); + } else { + glDrawArrays(GL_TRIANGLES, 0, count); + } + + duplicate_index = (duplicate_index + 1) % num_duplicates; + } +} + +enum cull_method { + NONE, + BACK_FACE_CULLING, + VIEW_CULLING, + SUBPIXEL_PRIMS, + RASTERIZER_DISCARD, + DEGENERATE_PRIMS, + NUM_CULL_METHODS, +}; + +static double +run_test(unsigned debug_num_iterations, bool indexed, enum cull_method cull_method, + unsigned num_quads_per_dim, double quad_size_in_pixels, + unsigned cull_percentage) +{ + const unsigned max_indices = 8100000 * 3; + const unsigned max_vertices = max_indices; + + while (num_quads_per_dim * quad_size_in_pixels >= WINDOW_SIZE) + quad_size_in_pixels *= 0.5; + + /* Generate vertices. */ + float *vertices = (float*)malloc(max_vertices * 12); + unsigned *indices = NULL; + + if (indexed) + indices = (unsigned*)malloc(max_indices * 4); + + unsigned num_vertices = 0, num_indices = 0; + gen_triangle_tile(num_quads_per_dim, quad_size_in_pixels, + cull_percentage, + cull_method == BACK_FACE_CULLING, + cull_method == VIEW_CULLING, + cull_method == DEGENERATE_PRIMS, + max_vertices, &num_vertices, vertices, + max_indices, &num_indices, indices); + + vb_size = num_vertices * 12; + ib_size = num_indices * 4; + + /* Duplicate buffers and switch between them, so that no data is cached + * between draws. 32 MB should be greater than any cache. + */ + num_duplicates = MAX2(1, 32*1024*1024 / vb_size); + + /* Create buffers. */ + GLuint vb, ib; + glGenBuffers(1, &vb); + glBindBuffer(GL_ARRAY_BUFFER, vb); + glBufferData(GL_ARRAY_BUFFER, + vb_size * num_duplicates, NULL, GL_STATIC_DRAW); + for (unsigned i = 0; i < num_duplicates; i++) + glBufferSubData(GL_ARRAY_BUFFER, vb_size * i, vb_size, vertices); + free(vertices); + + if (indexed) { + glGenBuffers(1, &ib); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, + ib_size * num_duplicates, NULL, + GL_STATIC_DRAW); + for (unsigned i = 0; i < num_duplicates; i++) { + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, ib_size * i, + ib_size, indices); + } + free(indices); + } + /* Make sure all uploads are finished. */ + glFinish(); + + /* Test */ + if (cull_method == RASTERIZER_DISCARD) + glEnable(GL_RASTERIZER_DISCARD); + + glBindBuffer(GL_ARRAY_BUFFER, vb); + if (indexed) + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib); + + is_indexed = indexed; + count = indexed ? num_indices : num_vertices; + duplicate_index = 0; + + double rate = 0; + + if (debug_num_iterations) + run_draw(debug_num_iterations); + else + rate = perf_measure_rate(run_draw, 0.15); + + if (cull_method == RASTERIZER_DISCARD) + glDisable(GL_RASTERIZER_DISCARD); + + /* Cleanup. */ + glDeleteBuffers(1, &vb); + if (indexed) + glDeleteBuffers(1, &ib); + return rate; +} + +enum piglit_result +piglit_display(void) +{ + double rate; + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + /* for debugging */ + if (getenv("ONE")) { + glUseProgram(progs[0]); + run_test(100, true, BACK_FACE_CULLING, ceil(sqrt(0.5 * 512000)), 2, 50); + piglit_swap_buffers(); + return PIGLIT_PASS; + } + + static const unsigned num_quads_per_dim[] = { + /* The second number is the approx. number of primitives. */ + ceil(sqrt(0.5 * 1000)), + ceil(sqrt(0.5 * 2000)), + ceil(sqrt(0.5 * 4000)), + ceil(sqrt(0.5 * 6000)), + ceil(sqrt(0.5 * 8000)), + ceil(sqrt(0.5 * 16000)), + ceil(sqrt(0.5 * 32000)), + ceil(sqrt(0.5 * 128000)), + ceil(sqrt(0.5 * 512000)), + /* 512000 is the maximum number when everything fits into the window */ + /* After that, the prim size decreases, so you'll get subpixel prims. */ + ceil(sqrt(0.5 * 2000000)), + ceil(sqrt(0.5 * 8000000)), + }; + + unsigned num_prims[ARRAY_SIZE(num_quads_per_dim)]; + for (int i = 0; i < ARRAY_SIZE(num_quads_per_dim); i++) + num_prims[i] = num_quads_per_dim[i] * num_quads_per_dim[i] * 2; + + printf(" Measuring %-27s, 0 Varying 4 Varyings 8 Varyings\n", + gpu_freq_mhz ? "Prims/clock," : "GPrims/second,"); + printf(" Draw Call , Cull Method "); + + for (unsigned prog = 0; prog < ARRAY_SIZE(progs); prog++) { + if (prog) + printf(" "); + for (int i = 0; i < ARRAY_SIZE(num_prims); i++) + printf(", %4uK", num_prims[i] / 1000); + } + printf("\n"); + + for (int indexed = 1; indexed >= 0; indexed--) { + for (int cull_method = 0; cull_method < NUM_CULL_METHODS; cull_method++) { + unsigned num_subtests = 1; + static unsigned cull_percentages[] = {100, 75, 50, 25}; + static double quad_sizes_in_pixels[] = {1.0 / 7, 0.25, 0.5}; + + if (cull_method == BACK_FACE_CULLING || + cull_method == VIEW_CULLING || + cull_method == DEGENERATE_PRIMS) { + num_subtests = ARRAY_SIZE(cull_percentages); + } else if (cull_method == SUBPIXEL_PRIMS) { + num_subtests = ARRAY_SIZE(quad_sizes_in_pixels); + } + + for (unsigned subtest = 0; subtest < num_subtests; subtest++) { + /* 2 is the maximum prim size when everything fits into the window */ + double quad_size_in_pixels; + unsigned cull_percentage; + + if (cull_method == SUBPIXEL_PRIMS) { + quad_size_in_pixels = quad_sizes_in_pixels[subtest]; + cull_percentage = 0; + } else { + quad_size_in_pixels = 2; + cull_percentage = cull_percentages[subtest]; + } + + printf(" %-14s, ", indexed ? "glDrawElements" : "glDrawArrays"); + + if (cull_method == NONE || + cull_method == RASTERIZER_DISCARD) { + printf("%-21s", + cull_method == NONE ? "none" : "rasterizer discard"); + } else if (cull_method == SUBPIXEL_PRIMS) { + printf("%2u small prims/pixel ", + (unsigned)((1.0 / quad_size_in_pixels) * + (1.0 / quad_size_in_pixels) * 2)); + } else { + printf("%3u%% %-16s", cull_percentage, + cull_method == BACK_FACE_CULLING ? "back faces" : + cull_method == VIEW_CULLING ? "culled by view" : + cull_method == DEGENERATE_PRIMS ? "degenerate prims" : + "(error)"); + } + fflush(stdout); + + for (unsigned prog = 0; prog < ARRAY_SIZE(progs); prog++) { + glUseProgram(progs[prog]); + + if (prog) + printf(" "); + + for (int i = 0; i < ARRAY_SIZE(num_prims); i++) { + rate = run_test(false, indexed, cull_method, num_quads_per_dim[i], + quad_size_in_pixels, cull_percentage); + rate *= num_prims[i]; + + if (gpu_freq_mhz) { + rate /= gpu_freq_mhz * 1000000.0; + printf(",%6.2f", rate); + } else { + printf(",%6.2f", rate / 1000000000); + } + fflush(stdout); + } + } + printf("\n"); + } + } + } + + exit(0); + return PIGLIT_SKIP; +} diff --git a/tests/perf/drawoverhead.c b/tests/perf/drawoverhead.c index b30d7d46a..d1fa8d3c1 100644 --- a/tests/perf/drawoverhead.c +++ b/tests/perf/drawoverhead.c @@ -641,7 +641,7 @@ perf_run(const char *call, unsigned num_vbos, unsigned num_ubos, static unsigned test_index; test_index++; - double rate = perf_measure_rate(f); + double rate = perf_measure_rate(f, 0.5); double ratio = base_rate ? rate / base_rate : 1; printf(" %3u, %s (%2u VBO| %u UBO| %2u %s) w/ %s change,%*s" |